summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/acct.c3
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/cpu.c23
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/exit.c42
-rw-r--r--kernel/fork.c20
-rw-r--r--kernel/futex.c50
-rw-r--r--kernel/hrtimer.c170
-rw-r--r--kernel/hw_breakpoint.c146
-rw-r--r--kernel/irq/autoprobe.c20
-rw-r--r--kernel/irq/chip.c86
-rw-r--r--kernel/irq/handle.c22
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c52
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/irq/numa_migrate.c8
-rw-r--r--kernel/irq/pm.c8
-rw-r--r--kernel/irq/proc.c4
-rw-r--r--kernel/irq/spurious.c16
-rw-r--r--kernel/itimer.c7
-rw-r--r--kernel/kexec.c61
-rw-r--r--kernel/kgdb.c56
-rw-r--r--kernel/ksysfs.c21
-rw-r--r--kernel/lockdep.c31
-rw-r--r--kernel/module.c191
-rw-r--r--kernel/mutex-debug.h12
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c8
-rw-r--r--kernel/perf_event.c202
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/pm_qos_params.c20
-rw-r--r--kernel/posix-cpu-timers.c5
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/console.c7
-rw-r--r--kernel/power/hibernate.c30
-rw-r--r--kernel/power/main.c1
-rw-r--r--kernel/power/process.c14
-rw-r--r--kernel/power/swap.c107
-rw-r--r--kernel/power/swsusp.c130
-rw-r--r--kernel/printk.c119
-rw-r--r--kernel/rcutorture.c8
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/resource.c26
-rw-r--r--kernel/rtmutex-debug.c4
-rw-r--r--kernel/rtmutex.c106
-rw-r--r--kernel/sched.c454
-rw-r--r--kernel/sched_cpupri.c10
-rw-r--r--kernel/sched_cpupri.h2
-rw-r--r--kernel/sched_debug.c17
-rw-r--r--kernel/sched_fair.c155
-rw-r--r--kernel/sched_features.h5
-rw-r--r--kernel/sched_idletask.c6
-rw-r--r--kernel/sched_rt.c62
-rw-r--r--kernel/signal.c38
-rw-r--r--kernel/slow-work.c7
-rw-r--r--kernel/smp.c35
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c54
-rw-r--r--kernel/spinlock.c306
-rw-r--r--kernel/sys.c22
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/sysctl.c931
-rw-r--r--kernel/sysctl_binary.c1514
-rw-r--r--kernel/sysctl_check.c1376
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/clockevents.c45
-rw-r--r--kernel/time/clocksource.c105
-rw-r--r--kernel/time/tick-broadcast.c42
-rw-r--r--kernel/time/tick-common.c20
-rw-r--r--kernel/time/tick-internal.h1
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/tick-sched.c141
-rw-r--r--kernel/time/timecompare.c8
-rw-r--r--kernel/time/timekeeping.c125
-rw-r--r--kernel/time/timer_list.c25
-rw-r--r--kernel/time/timer_stats.c18
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/ftrace.c30
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c45
-rw-r--r--kernel/trace/trace.c307
-rw-r--r--kernel/trace/trace.h27
-rw-r--r--kernel/trace/trace_clock.c8
-rw-r--r--kernel/trace/trace_event_profile.c6
-rw-r--r--kernel/trace/trace_events.c41
-rw-r--r--kernel/trace/trace_export.c4
-rw-r--r--kernel/trace/trace_functions_graph.c169
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c46
-rw-r--r--kernel/trace/trace_ksym.c61
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c4
-rw-r--r--kernel/trace/trace_stack.c16
-rw-r--r--kernel/trace/trace_syscalls.c18
-rw-r--r--kernel/user-return-notifier.c44
-rw-r--r--kernel/utsname_sysctl.c31
-rw-r--r--kernel/workqueue.c131
101 files changed, 4418 insertions, 4149 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 982c50e2ce53..864ff75d65f2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,7 +4,7 @@
obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
- sysctl.o capability.o ptrace.o timer.o user.o \
+ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
@@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/acct.c b/kernel/acct.c
index 9a4715a2f6bf..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -536,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
do_div(elapsed, AHZ);
ac.ac_btime = get_seconds() - elapsed;
/* we really need to bite the bullet and change layout */
- current_uid_gid(&ac.ac_uid, &ac.ac_gid);
+ ac.ac_uid = orig_cred->uid;
+ ac.ac_gid = orig_cred->gid;
#if ACCT_VERSION==2
ac.ac_ahz = AHZ;
#endif
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
}
- for (i = j = 0; i < size; i++, j++) {
+ for (i = j = 0; j <= size; i++, j++) {
struct audit_tree *s;
if (&chunk->owners[j] == p) {
list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
if (!s) /* result of earlier fallback */
continue;
get_tree(s);
- list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+ list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}
list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
- put_inotify_watch(watch);
+ put_inotify_watch(&old->watch);
return 0;
}
}
spin_unlock(&hash_lock);
chunk = alloc_chunk(old->count + 1);
- if (!chunk)
+ if (!chunk) {
+ put_inotify_watch(&old->watch);
return -ENOMEM;
+ }
mutex_lock(&inode->inotify_mutex);
if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
inotify_evict_watch(&old->watch);
mutex_unlock(&inode->inotify_mutex);
- put_inotify_watch(&old->watch);
+ put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+ put_inotify_watch(&old->watch); /* and kill it */
return 0;
}
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
void foo(void)
{
- /* The enum constants to put into include/linux/bounds.h */
+ /* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
/* End of constants */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ba0f1ecb212..291ac586f37f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
if (err == NOTIFY_BAD) {
+ set_cpu_active(cpu, true);
+
nr_calls--;
__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu, nr_calls, NULL);
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
/* Ensure that we are not runnable on dying cpu */
cpumask_copy(old_allowed, &current->cpus_allowed);
- set_cpus_allowed_ptr(current,
- cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
+ set_cpus_allowed_ptr(current, cpu_active_mask);
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
+ set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD)
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu)
err = _cpu_down(cpu, 0);
- if (cpu_online(cpu))
- set_cpu_active(cpu, true);
-
out:
cpu_maps_update_done();
stop_machine_destroy();
@@ -387,15 +386,23 @@ int disable_nonboot_cpus(void)
* with the userspace trying to use the CPU hotplug at the same time
*/
cpumask_clear(frozen_cpus);
+
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+ continue;
+ set_cpu_active(cpu, false);
+ }
+
+ synchronize_sched();
+
printk("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
continue;
error = _cpu_down(cpu, 1);
- if (!error) {
+ if (!error)
cpumask_set_cpu(cpu, frozen_cpus);
- printk("CPU%d is down\n", cpu);
- } else {
+ else {
printk(KERN_ERR "Error taking CPU%d down: %d\n",
cpu, error);
break;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3cf2183b472d..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
{
}
-static int generate_sched_domains(struct cpumask **domains,
+static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr **attributes)
{
*domains = NULL;
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (retval < 0)
return retval;
- if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
+ if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
return -EINVAL;
}
retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
}
/* Continue past cpusets with all cpus, mems online */
- if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
+ if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
- cpu_online_mask);
+ cpu_active_mask);
nodes_and(cp->mems_allowed, cp->mems_allowed,
node_states[N_HIGH_MEMORY]);
mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
switch (phase) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
break;
default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
cgroup_lock();
mutex_lock(&callback_mutex);
- cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
mutex_unlock(&callback_mutex);
scan_for_empty_cpusets(&top_cpuset);
ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
void __init cpuset_init_smp(void)
{
- cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
diff --git a/kernel/exit.c b/kernel/exit.c
index 80ae941cfd2e..546774a31a66 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p)
detach_pid(p, PIDTYPE_SID);
list_del_rcu(&p->tasks);
+ list_del_init(&p->sibling);
__get_cpu_var(process_counts)--;
}
list_del_rcu(&p->thread_group);
- list_del_init(&p->sibling);
}
/*
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
/*
* Any that need to be release_task'd are put on the @dead list.
*/
-static void reparent_thread(struct task_struct *father, struct task_struct *p,
+static void reparent_leader(struct task_struct *father, struct task_struct *p,
struct list_head *dead)
{
- if (p->pdeath_signal)
- group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
-
list_move_tail(&p->sibling, &p->real_parent->children);
if (task_detached(p))
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father)
reaper = find_new_reaper(father);
list_for_each_entry_safe(p, n, &father->children, sibling) {
- p->real_parent = reaper;
- if (p->parent == father) {
- BUG_ON(task_ptrace(p));
- p->parent = p->real_parent;
- }
- reparent_thread(father, p, &dead_children);
+ struct task_struct *t = p;
+ do {
+ t->real_parent = reaper;
+ if (t->parent == father) {
+ BUG_ON(task_ptrace(t));
+ t->parent = t->real_parent;
+ }
+ if (t->pdeath_signal)
+ group_send_sig_info(t->pdeath_signal,
+ SEND_SIG_NOINFO, t);
+ } while_each_thread(p, t);
+ reparent_leader(father, p, &dead_children);
}
write_unlock_irq(&tasklist_lock);
@@ -933,7 +936,7 @@ NORET_TYPE void do_exit(long code)
* an exiting task cleaning up the robust pi futexes.
*/
smp_mb();
- spin_unlock_wait(&tsk->pi_lock);
+ raw_spin_unlock_wait(&tsk->pi_lock);
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -971,7 +974,7 @@ NORET_TYPE void do_exit(long code)
exit_thread();
cgroup_exit(tsk, 1);
- if (group_dead && tsk->signal->leader)
+ if (group_dead)
disassociate_ctty(1);
module_put(task_thread_info(tsk)->exec_domain->module);
@@ -1009,7 +1012,7 @@ NORET_TYPE void do_exit(long code)
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
- exit_io_context();
+ exit_io_context(tsk);
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
struct task_struct *p;
list_for_each_entry(p, &tsk->children, sibling) {
- /*
- * Do not consider detached threads.
- */
- if (!task_detached(p)) {
- int ret = wait_consider_task(wo, 0, p);
- if (ret)
- return ret;
- }
+ int ret = wait_consider_task(wo, 0, p);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3d6f121bbe8a..5b2959b3ffc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
#include <linux/magic.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
+#include <linux/user-return-notifier.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto out;
setup_thread_stack(tsk, orig);
+ clear_user_return_notifier(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
@@ -937,9 +939,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
static void rt_mutex_init_task(struct task_struct *p)
{
- spin_lock_init(&p->pi_lock);
+ raw_spin_lock_init(&p->pi_lock);
#ifdef CONFIG_RT_MUTEXES
- plist_head_init(&p->pi_waiters, &p->pi_lock);
+ plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
p->pi_blocked_on = NULL;
#endif
}
@@ -1125,6 +1127,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ p->memcg_batch.do_batch = 0;
+ p->memcg_batch.memcg = NULL;
+#endif
p->bts = NULL;
@@ -1204,9 +1210,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->sas_ss_sp = p->sas_ss_size = 0;
/*
- * Syscall tracing should be turned off in the child regardless
- * of CLONE_PTRACE.
+ * Syscall tracing and stepping should be turned off in the
+ * child regardless of CLONE_PTRACE.
*/
+ user_disable_single_step(p);
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1284,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (likely(p->pid)) {
- list_add_tail(&p->sibling, &p->real_parent->children);
tracehook_finish_clone(p, clone_flags, trace);
if (thread_group_leader(p)) {
@@ -1296,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->signal->tty = tty_kref_get(current->signal->tty);
attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
attach_pid(p, PIDTYPE_SID, task_session(current));
+ list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
__get_cpu_var(process_counts)++;
}
@@ -1315,7 +1322,8 @@ bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
- put_io_context(p->io_context);
+ if (p->io_context)
+ exit_io_context(p);
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
diff --git a/kernel/futex.c b/kernel/futex.c
index d73ef1f3e55d..8e3c3ffe1b9a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -403,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already
*/
if (pi_state->owner) {
- spin_lock_irq(&pi_state->owner->pi_lock);
+ raw_spin_lock_irq(&pi_state->owner->pi_lock);
list_del_init(&pi_state->list);
- spin_unlock_irq(&pi_state->owner->pi_lock);
+ raw_spin_unlock_irq(&pi_state->owner->pi_lock);
rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
}
@@ -470,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr)
* pi_state_list anymore, but we have to be careful
* versus waiters unqueueing themselves:
*/
- spin_lock_irq(&curr->pi_lock);
+ raw_spin_lock_irq(&curr->pi_lock);
while (!list_empty(head)) {
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
hb = hash_futex(&key);
- spin_unlock_irq(&curr->pi_lock);
+ raw_spin_unlock_irq(&curr->pi_lock);
spin_lock(&hb->lock);
- spin_lock_irq(&curr->pi_lock);
+ raw_spin_lock_irq(&curr->pi_lock);
/*
* We dropped the pi-lock, so re-check whether this
* task still owns the PI-state:
@@ -495,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr)
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
pi_state->owner = NULL;
- spin_unlock_irq(&curr->pi_lock);
+ raw_spin_unlock_irq(&curr->pi_lock);
rt_mutex_unlock(&pi_state->pi_mutex);
spin_unlock(&hb->lock);
- spin_lock_irq(&curr->pi_lock);
+ raw_spin_lock_irq(&curr->pi_lock);
}
- spin_unlock_irq(&curr->pi_lock);
+ raw_spin_unlock_irq(&curr->pi_lock);
}
static int
@@ -558,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
* change of the task flags, we do this protected by
* p->pi_lock:
*/
- spin_lock_irq(&p->pi_lock);
+ raw_spin_lock_irq(&p->pi_lock);
if (unlikely(p->flags & PF_EXITING)) {
/*
* The task is on the way out. When PF_EXITPIDONE is
@@ -567,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
*/
int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
- spin_unlock_irq(&p->pi_lock);
+ raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p);
return ret;
}
@@ -586,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
pi_state->owner = p;
- spin_unlock_irq(&p->pi_lock);
+ raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p);
@@ -760,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (!pi_state)
return -EINVAL;
- spin_lock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
@@ -789,23 +789,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
else if (curval != uval)
ret = -EINVAL;
if (ret) {
- spin_unlock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
return ret;
}
}
- spin_lock_irq(&pi_state->owner->pi_lock);
+ raw_spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
- spin_unlock_irq(&pi_state->owner->pi_lock);
+ raw_spin_unlock_irq(&pi_state->owner->pi_lock);
- spin_lock_irq(&new_owner->pi_lock);
+ raw_spin_lock_irq(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
- spin_unlock_irq(&new_owner->pi_lock);
+ raw_spin_unlock_irq(&new_owner->pi_lock);
- spin_unlock(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
rt_mutex_unlock(&pi_state->pi_mutex);
return 0;
@@ -1010,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.lock = &hb2->lock;
+ q->list.plist.spinlock = &hb2->lock;
#endif
}
get_futex_key_refs(key2);
@@ -1046,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
q->lock_ptr = &hb->lock;
#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.lock = &hb->lock;
+ q->list.plist.spinlock = &hb->lock;
#endif
wake_up_state(q->task, TASK_NORMAL);
@@ -1394,7 +1394,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
plist_node_init(&q->list, prio);
#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.lock = &hb->lock;
+ q->list.plist.spinlock = &hb->lock;
#endif
plist_add(&q->list, &hb->chain);
q->task = current;
@@ -1529,18 +1529,18 @@ retry:
* itself.
*/
if (pi_state->owner != NULL) {
- spin_lock_irq(&pi_state->owner->pi_lock);
+ raw_spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
- spin_unlock_irq(&pi_state->owner->pi_lock);
+ raw_spin_unlock_irq(&pi_state->owner->pi_lock);
}
pi_state->owner = newowner;
- spin_lock_irq(&newowner->pi_lock);
+ raw_spin_lock_irq(&newowner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
- spin_unlock_irq(&newowner->pi_lock);
+ raw_spin_unlock_irq(&newowner->pi_lock);
return 0;
/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 3e1c36e7998f..0086628b6e97 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
for (;;) {
base = timer->base;
if (likely(base != NULL)) {
- spin_lock_irqsave(&base->cpu_base->lock, *flags);
+ raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
return base;
/* The timer has migrated to another CPU: */
- spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
+ raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
}
cpu_relax();
}
@@ -208,13 +208,13 @@ again:
/* See the comment in lock_timer_base() */
timer->base = NULL;
- spin_unlock(&base->cpu_base->lock);
- spin_lock(&new_base->cpu_base->lock);
+ raw_spin_unlock(&base->cpu_base->lock);
+ raw_spin_lock(&new_base->cpu_base->lock);
if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
cpu = this_cpu;
- spin_unlock(&new_base->cpu_base->lock);
- spin_lock(&base->cpu_base->lock);
+ raw_spin_unlock(&new_base->cpu_base->lock);
+ raw_spin_lock(&base->cpu_base->lock);
timer->base = base;
goto again;
}
@@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
{
struct hrtimer_clock_base *base = timer->base;
- spin_lock_irqsave(&base->cpu_base->lock, *flags);
+ raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
return base;
}
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
static int hrtimer_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
{
- ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
int res;
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
if (expires.tv64 < 0)
return -ETIME;
- if (expires.tv64 >= expires_next->tv64)
+ if (expires.tv64 >= cpu_base->expires_next.tv64)
+ return 0;
+
+ /*
+ * If a hang was detected in the last timer interrupt then we
+ * do not schedule a timer which is earlier than the expiry
+ * which we enforced in the hang detection. We want the system
+ * to make progress.
+ */
+ if (cpu_base->hang_detected)
return 0;
/*
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
*/
res = tick_program_event(expires, 0);
if (!IS_ERR_VALUE(res))
- *expires_next = expires;
+ cpu_base->expires_next = expires;
return res;
}
@@ -619,12 +628,12 @@ static void retrigger_next_event(void *arg)
base = &__get_cpu_var(hrtimer_bases);
/* Adjust CLOCK_REALTIME offset */
- spin_lock(&base->lock);
+ raw_spin_lock(&base->lock);
base->clock_base[CLOCK_REALTIME].offset =
timespec_to_ktime(realtime_offset);
hrtimer_force_reprogram(base, 0);
- spin_unlock(&base->lock);
+ raw_spin_unlock(&base->lock);
}
/*
@@ -685,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
{
if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
if (wakeup) {
- spin_unlock(&base->cpu_base->lock);
+ raw_spin_unlock(&base->cpu_base->lock);
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
- spin_lock(&base->cpu_base->lock);
+ raw_spin_lock(&base->cpu_base->lock);
} else
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
+static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
{
+#ifdef CONFIG_TIMER_STATS
if (timer->start_site)
return;
-
- timer->start_site = addr;
+ timer->start_site = __builtin_return_address(0);
memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
timer->start_pid = current->pid;
+#endif
}
+
+static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+ timer->start_site = NULL;
+#endif
+}
+
+static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+ if (likely(!timer_stats_active))
+ return;
+ timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+ timer->function, timer->start_comm, 0);
#endif
+}
/*
* Counterpart to lock_hrtimer_base above:
@@ -765,7 +790,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
static inline
void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
{
- spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
+ raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
}
/**
@@ -1098,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
unsigned long flags;
int i;
- spin_lock_irqsave(&cpu_base->lock, flags);
+ raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (!hrtimer_hres_active()) {
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
@@ -1115,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void)
}
}
- spin_unlock_irqrestore(&cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
if (mindelta.tv64 < 0)
mindelta.tv64 = 0;
@@ -1197,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
* they get migrated to another cpu, therefore its safe to unlock
* the timer base.
*/
- spin_unlock(&cpu_base->lock);
+ raw_spin_unlock(&cpu_base->lock);
trace_hrtimer_expire_entry(timer, now);
restart = fn(timer);
trace_hrtimer_expire_exit(timer);
- spin_lock(&cpu_base->lock);
+ raw_spin_lock(&cpu_base->lock);
/*
* Note: We clear the CALLBACK bit after enqueue_hrtimer and
@@ -1217,29 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
#ifdef CONFIG_HIGH_RES_TIMERS
-static int force_clock_reprogram;
-
-/*
- * After 5 iteration's attempts, we consider that hrtimer_interrupt()
- * is hanging, which could happen with something that slows the interrupt
- * such as the tracing. Then we force the clock reprogramming for each future
- * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
- * threshold that we will overwrite.
- * The next tick event will be scheduled to 3 times we currently spend on
- * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
- * 1/4 of their time to process the hrtimer interrupts. This is enough to
- * let it running without serious starvation.
- */
-
-static inline void
-hrtimer_interrupt_hanging(struct clock_event_device *dev,
- ktime_t try_time)
-{
- force_clock_reprogram = 1;
- dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
- printk(KERN_WARNING "hrtimer: interrupt too slow, "
- "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
-}
/*
* High resolution timer interrupt
* Called with interrupts disabled
@@ -1248,24 +1250,18 @@ void hrtimer_interrupt(struct clock_event_device *dev)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
- ktime_t expires_next, now;
- int nr_retries = 0;
- int i;
+ ktime_t expires_next, now, entry_time, delta;
+ int i, retries = 0;
BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++;
dev->next_event.tv64 = KTIME_MAX;
- retry:
- /* 5 retries is enough to notice a hang */
- if (!(++nr_retries % 5))
- hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
-
- now = ktime_get();
-
+ entry_time = now = ktime_get();
+retry:
expires_next.tv64 = KTIME_MAX;
- spin_lock(&cpu_base->lock);
+ raw_spin_lock(&cpu_base->lock);
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
@@ -1321,13 +1317,51 @@ void hrtimer_interrupt(struct clock_event_device *dev)
* against it.
*/
cpu_base->expires_next = expires_next;
- spin_unlock(&cpu_base->lock);
+ raw_spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
- if (expires_next.tv64 != KTIME_MAX) {
- if (tick_program_event(expires_next, force_clock_reprogram))
- goto retry;
+ if (expires_next.tv64 == KTIME_MAX ||
+ !tick_program_event(expires_next, 0)) {
+ cpu_base->hang_detected = 0;
+ return;
}
+
+ /*
+ * The next timer was already expired due to:
+ * - tracing
+ * - long lasting callbacks
+ * - being scheduled away when running in a VM
+ *
+ * We need to prevent that we loop forever in the hrtimer
+ * interrupt routine. We give it 3 attempts to avoid
+ * overreacting on some spurious event.
+ */
+ now = ktime_get();
+ cpu_base->nr_retries++;
+ if (++retries < 3)
+ goto retry;
+ /*
+ * Give the system a chance to do something else than looping
+ * here. We stored the entry time, so we know exactly how long
+ * we spent here. We schedule the next event this amount of
+ * time away.
+ */
+ cpu_base->nr_hangs++;
+ cpu_base->hang_detected = 1;
+ delta = ktime_sub(now, entry_time);
+ if (delta.tv64 > cpu_base->max_hang_time.tv64)
+ cpu_base->max_hang_time = delta;
+ /*
+ * Limit it to a sensible value as we enforce a longer
+ * delay. Give the CPU at least 100ms to catch up.
+ */
+ if (delta.tv64 > 100 * NSEC_PER_MSEC)
+ expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+ else
+ expires_next = ktime_add(now, delta);
+ tick_program_event(expires_next, 1);
+ printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+ ktime_to_ns(delta));
}
/*
@@ -1423,7 +1457,7 @@ void hrtimer_run_queues(void)
gettime = 0;
}
- spin_lock(&cpu_base->lock);
+ raw_spin_lock(&cpu_base->lock);
while ((node = base->first)) {
struct hrtimer *timer;
@@ -1435,7 +1469,7 @@ void hrtimer_run_queues(void)
__run_hrtimer(timer, &base->softirq_time);
}
- spin_unlock(&cpu_base->lock);
+ raw_spin_unlock(&cpu_base->lock);
}
}
@@ -1591,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
- spin_lock_init(&cpu_base->lock);
+ raw_spin_lock_init(&cpu_base->lock);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
cpu_base->clock_base[i].cpu_base = cpu_base;
@@ -1649,16 +1683,16 @@ static void migrate_hrtimers(int scpu)
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- spin_lock(&new_base->lock);
- spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&new_base->lock);
+ raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
&new_base->clock_base[i]);
}
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
+ raw_spin_unlock(&old_base->lock);
+ raw_spin_unlock(&new_base->lock);
/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..dbcbf6a33a08 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
/* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
/* Number of non-pinned cpu/task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
static unsigned int max_task_bp_pinned(int cpu)
{
int i;
- unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+ unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
for (i = HBP_NUM -1; i >= 0; i--) {
if (tsk_pinned[i] > 0)
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0;
}
+static int task_bp_pinned(struct task_struct *tsk)
+{
+ struct perf_event_context *ctx = tsk->perf_event_ctxp;
+ struct list_head *list;
+ struct perf_event *bp;
+ unsigned long flags;
+ int count = 0;
+
+ if (WARN_ONCE(!ctx, "No perf context for this task"))
+ return 0;
+
+ list = &ctx->event_list;
+
+ raw_spin_lock_irqsave(&ctx->lock, flags);
+
+ /*
+ * The current breakpoint counter is not included in the list
+ * at the open() callback time
+ */
+ list_for_each_entry(bp, list, event_entry) {
+ if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+ count++;
+ }
+
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+ return count;
+}
+
/*
* Report the number of pinned/un-pinned breakpoints we have in
* a given cpu (cpu > -1) or in all of them (cpu = -1).
*/
-static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+static void
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
{
+ int cpu = bp->cpu;
+ struct task_struct *tsk = bp->ctx->task;
+
if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
- slots->pinned += max_task_bp_pinned(cpu);
+ if (!tsk)
+ slots->pinned += max_task_bp_pinned(cpu);
+ else
+ slots->pinned += task_bp_pinned(tsk);
slots->flexible = per_cpu(nr_bp_flexible, cpu);
return;
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu);
- nr += max_task_bp_pinned(cpu);
+ if (!tsk)
+ nr += max_task_bp_pinned(cpu);
+ else
+ nr += task_bp_pinned(tsk);
if (nr > slots->pinned)
slots->pinned = nr;
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
*/
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
{
- int count = 0;
- struct perf_event *bp;
- struct perf_event_context *ctx = tsk->perf_event_ctxp;
unsigned int *tsk_pinned;
- struct list_head *list;
- unsigned long flags;
-
- if (WARN_ONCE(!ctx, "No perf context for this task"))
- return;
-
- list = &ctx->event_list;
-
- spin_lock_irqsave(&ctx->lock, flags);
-
- /*
- * The current breakpoint counter is not included in the list
- * at the open() callback time
- */
- list_for_each_entry(bp, list, event_entry) {
- if (bp->attr.type == PERF_TYPE_BREAKPOINT)
- count++;
- }
-
- spin_unlock_irqrestore(&ctx->lock, flags);
+ int count = 0;
- if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
- return;
+ count = task_bp_pinned(tsk);
- tsk_pinned = per_cpu(task_bp_pinned, cpu);
+ tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
if (enable) {
tsk_pinned[count]++;
if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
- * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
*
* -> If there are already non-pinned counters in this cpu, it means
* there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
- * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
*
* -> This is roughly the same, except we check the number of per cpu
* bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
- * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
*
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep
* one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
- * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
*/
int reserve_bp_slot(struct perf_event *bp)
{
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex);
- fetch_bp_busy_slots(&slots, bp->cpu);
+ fetch_bp_busy_slots(&slots, bp);
/* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
}
-int __register_perf_hw_breakpoint(struct perf_event *bp)
+int register_perf_hw_breakpoint(struct perf_event *bp)
{
int ret;
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
* This is a quick hack that will be removed soon, once we remove
* the tmp breakpoints from ptrace
*/
- if (!bp->attr.disabled || bp->callback == perf_bp_event)
+ if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
return ret;
}
-int register_perf_hw_breakpoint(struct perf_event *bp)
-{
- bp->callback = perf_bp_event;
-
- return __register_perf_hw_breakpoint(bp);
-}
-
/**
* register_user_hw_breakpoint - register a hardware breakpoint for user space
* @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
*/
struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr,
- perf_callback_t triggered,
+ perf_overflow_handler_t triggered,
struct task_struct *tsk)
{
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
* @triggered: callback to trigger when we hit the breakpoint
* @tsk: pointer to 'task_struct' of the process to which the address belongs
*/
-struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
- perf_callback_t triggered,
- struct task_struct *tsk)
+int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
{
- /*
- * FIXME: do it without unregistering
- * - We don't want to lose our slot
- * - If the new bp is incorrect, don't lose the older one
- */
- unregister_hw_breakpoint(bp);
+ u64 old_addr = bp->attr.bp_addr;
+ int old_type = bp->attr.bp_type;
+ int old_len = bp->attr.bp_len;
+ int err = 0;
- return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+ perf_event_disable(bp);
+
+ bp->attr.bp_addr = attr->bp_addr;
+ bp->attr.bp_type = attr->bp_type;
+ bp->attr.bp_len = attr->bp_len;
+
+ if (attr->disabled)
+ goto end;
+
+ err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+ if (!err)
+ perf_event_enable(bp);
+
+ if (err) {
+ bp->attr.bp_addr = old_addr;
+ bp->attr.bp_type = old_type;
+ bp->attr.bp_len = old_len;
+ if (!bp->attr.disabled)
+ perf_event_enable(bp);
+
+ return err;
+ }
+
+end:
+ bp->attr.disabled = attr->disabled;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*/
struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr,
- perf_callback_t triggered)
+ perf_overflow_handler_t triggered)
{
struct perf_event **cpu_events, **pevent, *bp;
long err;
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 1de9700f416e..2295a31ef110 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -45,7 +45,7 @@ unsigned long probe_irq_on(void)
* flush such a longstanding irq before considering it as spurious.
*/
for_each_irq_desc_reverse(i, desc) {
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
/*
* An old-style architecture might still have
@@ -61,7 +61,7 @@ unsigned long probe_irq_on(void)
desc->chip->set_type(i, IRQ_TYPE_PROBE);
desc->chip->startup(i);
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
/* Wait for longstanding interrupts to trigger. */
@@ -73,13 +73,13 @@ unsigned long probe_irq_on(void)
* happened in the previous stage, it may have masked itself)
*/
for_each_irq_desc_reverse(i, desc) {
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
if (desc->chip->startup(i))
desc->status |= IRQ_PENDING;
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
/*
@@ -91,7 +91,7 @@ unsigned long probe_irq_on(void)
* Now filter out any obviously spurious interrupts
*/
for_each_irq_desc(i, desc) {
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
status = desc->status;
if (status & IRQ_AUTODETECT) {
@@ -103,7 +103,7 @@ unsigned long probe_irq_on(void)
if (i < 32)
mask |= 1 << i;
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
return mask;
@@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val)
int i;
for_each_irq_desc(i, desc) {
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
status = desc->status;
if (status & IRQ_AUTODETECT) {
@@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val)
desc->status = status & ~IRQ_AUTODETECT;
desc->chip->shutdown(i);
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
mutex_unlock(&probing_active);
@@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val)
unsigned int status;
for_each_irq_desc(i, desc) {
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
status = desc->status;
if (status & IRQ_AUTODETECT) {
@@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val)
desc->status = status & ~IRQ_AUTODETECT;
desc->chip->shutdown(i);
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
mutex_unlock(&probing_active);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ba566c261adc..ecc3fa28f666 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq)
}
/* Ensure we don't have left over values from a previous use of this irq */
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->status = IRQ_DISABLED;
desc->chip = &no_irq_chip;
desc->handle_irq = handle_bad_irq;
@@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq)
cpumask_clear(desc->pending_mask);
#endif
#endif
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
/**
@@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq)
return;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
if (desc->action) {
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
irq);
return;
@@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq)
desc->chip = &no_irq_chip;
desc->name = NULL;
clear_kstat_irqs(desc);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
if (!chip)
chip = &no_irq_chip;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
irq_chip_set_defaults(chip);
desc->chip = chip;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type)
if (type == IRQ_TYPE_NONE)
return 0;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
ret = __irq_set_trigger(desc, irq, type);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
EXPORT_SYMBOL(set_irq_type);
@@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data)
return -EINVAL;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->handler_data = data;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
EXPORT_SYMBOL(set_irq_data);
@@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
return -EINVAL;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->msi_desc = entry;
if (entry)
entry->irq = irq;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data)
return -EINVAL;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->chip_data = data;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest)
if (!desc)
return;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
if (nest)
desc->status |= IRQ_NESTED_THREAD;
else
desc->status &= ~IRQ_NESTED_THREAD;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
EXPORT_SYMBOL_GPL(set_irq_nested_thread);
@@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq)
might_sleep();
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
kstat_incr_irqs_this_cpu(irq, desc);
@@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq)
goto out_unlock;
desc->status |= IRQ_INPROGRESS;
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
action_ret = action->thread_fn(action->irq, action->dev_id);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
out_unlock:
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_nested_irq);
@@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
struct irqaction *action;
irqreturn_t action_ret;
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out_unlock;
@@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
desc->status |= IRQ_INPROGRESS;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
out_unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
/**
@@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
struct irqaction *action;
irqreturn_t action_ret;
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
mask_ack_irq(desc, irq);
if (unlikely(desc->status & IRQ_INPROGRESS))
@@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
desc->status |= IRQ_INPROGRESS;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
if (unlikely(desc->status & IRQ_ONESHOT))
@@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
desc->chip->unmask(irq);
out_unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);
@@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
struct irqaction *action;
irqreturn_t action_ret;
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out;
@@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
desc->status |= IRQ_INPROGRESS;
desc->status &= ~IRQ_PENDING;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
out:
desc->chip->eoi(irq);
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
/**
@@ -530,7 +530,7 @@ out:
void
handle_edge_irq(unsigned int irq, struct irq_desc *desc)
{
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
}
desc->status &= ~IRQ_PENDING;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
desc->status &= ~IRQ_INPROGRESS;
out_unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
/**
@@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
}
chip_bus_lock(irq, desc);
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
/* Uninstall? */
if (handle == handle_bad_irq) {
@@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
desc->depth = 0;
desc->chip->startup(irq);
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(irq, desc);
}
EXPORT_SYMBOL_GPL(__set_irq_handler);
@@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq)
return;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->status |= IRQ_NOPROBE;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
void __init set_irq_probe(unsigned int irq)
@@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq)
return;
}
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
desc->status &= ~IRQ_NOPROBE;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 17c71bb565c6..814940e7f485 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = {
.chip = &no_irq_chip,
.handle_irq = handle_bad_irq,
.depth = 1,
- .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
};
void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
- spin_lock_init(&desc->lock);
+ raw_spin_lock_init(&desc->lock);
desc->irq = irq;
#ifdef CONFIG_SMP
desc->node = node;
@@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
/*
* Protect the sparse_irqs:
*/
-DEFINE_SPINLOCK(sparse_irq_lock);
+DEFINE_RAW_SPINLOCK(sparse_irq_lock);
struct irq_desc **irq_desc_ptrs __read_mostly;
@@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
.chip = &no_irq_chip,
.handle_irq = handle_bad_irq,
.depth = 1,
- .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
}
};
@@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
if (desc)
return desc;
- spin_lock_irqsave(&sparse_irq_lock, flags);
+ raw_spin_lock_irqsave(&sparse_irq_lock, flags);
/* We have to check it to avoid races with another CPU */
desc = irq_desc_ptrs[irq];
@@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
irq_desc_ptrs[irq] = desc;
out_unlock:
- spin_unlock_irqrestore(&sparse_irq_lock, flags);
+ raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
return desc;
}
@@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
.chip = &no_irq_chip,
.handle_irq = handle_bad_irq,
.depth = 1,
- .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
}
};
@@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq)
return 1;
}
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (desc->chip->ack)
desc->chip->ack(irq);
/*
@@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq)
for (;;) {
irqreturn_t action_ret;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
@@ -536,7 +536,7 @@ out:
* disabled while the handler was running.
*/
desc->chip->end(irq);
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
return 1;
}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 1b5d742c6a77..b2821f070a3d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
extern void clear_kstat_irqs(struct irq_desc *desc);
-extern spinlock_t sparse_irq_lock;
+extern raw_spinlock_t sparse_irq_lock;
#ifdef CONFIG_SPARSE_IRQ
/* irq_desc_ptrs allocated at boot time */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index bde4c667d24d..eb6078ca60c7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq)
cpu_relax();
/* Ok, that indicated we're done: double-check carefully. */
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
status = desc->status;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
} while (status & IRQ_INPROGRESS);
@@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
if (!desc->chip->set_affinity)
return -EINVAL;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT) {
@@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
}
#endif
desc->status |= IRQ_AFFINITY_SET;
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq)
unsigned long flags;
int ret;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
if (!ret)
irq_set_thread_affinity(desc);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
@@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq)
return;
chip_bus_lock(irq, desc);
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
__disable_irq(desc, irq, false);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(irq, desc);
}
EXPORT_SYMBOL(disable_irq_nosync);
@@ -308,9 +308,9 @@ void enable_irq(unsigned int irq)
return;
chip_bus_lock(irq, desc);
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, false);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(irq, desc);
}
EXPORT_SYMBOL(enable_irq);
@@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
/* wakeup-capable irqs can be shared between drivers that
* don't need to have the same sleep mode behaviors.
*/
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
if (on) {
if (desc->wake_depth++ == 0) {
ret = set_irq_wake_real(irq, on);
@@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
}
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
EXPORT_SYMBOL(set_irq_wake);
@@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action)
static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
{
chip_bus_lock(irq, desc);
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
desc->status &= ~IRQ_MASKED;
desc->chip->unmask(irq);
}
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(irq, desc);
}
@@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
return;
}
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
cpumask_copy(mask, desc->affinity);
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
set_cpus_allowed_ptr(current, mask);
free_cpumask_var(mask);
@@ -545,7 +545,7 @@ static int irq_thread(void *data)
atomic_inc(&desc->threads_active);
- spin_lock_irq(&desc->lock);
+ raw_spin_lock_irq(&desc->lock);
if (unlikely(desc->status & IRQ_DISABLED)) {
/*
* CHECKME: We might need a dedicated
@@ -555,9 +555,9 @@ static int irq_thread(void *data)
* retriggers the interrupt itself --- tglx
*/
desc->status |= IRQ_PENDING;
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
} else {
- spin_unlock_irq(&desc->lock);
+ raw_spin_unlock_irq(&desc->lock);
action->thread_fn(action->irq, action->dev_id);
@@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
/*
* The following block of code has to be executed atomically
*/
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
old_ptr = &desc->action;
old = *old_ptr;
if (old) {
@@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
__enable_irq(desc, irq, false);
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
/*
* Strictly no need to wake it up, but hung_task complains
@@ -802,7 +802,7 @@ mismatch:
ret = -EBUSY;
out_thread:
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
if (new->thread) {
struct task_struct *t = new->thread;
@@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc)
return NULL;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
/*
* There can be multiple actions per IRQ descriptor, find the right
@@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!action) {
WARN(1, "Trying to free already-free IRQ %d\n", irq);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return NULL;
}
@@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -1067,7 +1067,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
kfree(action);
#ifdef CONFIG_DEBUG_SHIRQ
- if (irqflags & IRQF_SHARED) {
+ if (!retval && (irqflags & IRQF_SHARED)) {
/*
* It's a shared IRQ -- the driver ought to be prepared for it
* to happen immediately, so let's make sure....
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index fcb6c96f2627..241962280836 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -27,7 +27,7 @@ void move_masked_irq(int irq)
if (!desc->chip->set_affinity)
return;
- assert_spin_locked(&desc->lock);
+ assert_raw_spin_locked(&desc->lock);
/*
* If there was a valid mask to work with, please
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 3fd30197da2e..26bac9d8f860 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
"for migration.\n", irq);
return false;
}
- spin_lock_init(&desc->lock);
+ raw_spin_lock_init(&desc->lock);
desc->node = node;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
@@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
irq = old_desc->irq;
- spin_lock_irqsave(&sparse_irq_lock, flags);
+ raw_spin_lock_irqsave(&sparse_irq_lock, flags);
/* We have to check it to avoid races with another CPU */
desc = irq_desc_ptrs[irq];
@@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
}
irq_desc_ptrs[irq] = desc;
- spin_unlock_irqrestore(&sparse_irq_lock, flags);
+ raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
/* free the old one */
free_one_irq_desc(old_desc, desc);
@@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
return desc;
out_unlock:
- spin_unlock_irqrestore(&sparse_irq_lock, flags);
+ raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
return desc;
}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index a0bb09e79867..0d4005d85b03 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -28,9 +28,9 @@ void suspend_device_irqs(void)
for_each_irq_desc(irq, desc) {
unsigned long flags;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
__disable_irq(desc, irq, true);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
for_each_irq_desc(irq, desc)
@@ -56,9 +56,9 @@ void resume_device_irqs(void)
if (!(desc->status & IRQ_SUSPENDED))
continue;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, true);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}
EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0832145fea97..6f50eccc79c0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
unsigned long flags;
int ret = 1;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
for (action = desc->action ; action; action = action->next) {
if ((action != new_action) && action->name &&
!strcmp(new_action->name, action->name)) {
@@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
break;
}
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 22b0a6eedf24..89fb90ae534f 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
struct irqaction *action;
int ok = 0, work = 0;
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
/* Already running on another processor */
if (desc->status & IRQ_INPROGRESS) {
/*
@@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc)
*/
if (desc->action && (desc->action->flags & IRQF_SHARED))
desc->status |= IRQ_PENDING;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
return ok;
}
/* Honour the normal IRQ locking */
desc->status |= IRQ_INPROGRESS;
action = desc->action;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
while (action) {
/* Only shared IRQ handlers are safe to call */
@@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
}
local_irq_disable();
/* Now clean up the flags */
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
action = desc->action;
/*
@@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc)
* Perform real IRQ processing for the IRQ we deferred
*/
work = 1;
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
handle_IRQ_event(irq, action);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_PENDING;
}
desc->status &= ~IRQ_INPROGRESS;
@@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc)
*/
if (work && desc->chip && desc->chip->end)
desc->chip->end(irq);
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
return ok;
}
@@ -220,7 +220,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
/*
* If we are seeing only the odd spurious IRQ caused by
* bus asynchronicity then don't eventually trigger an error,
- * otherwise the couter becomes a doomsday timer for otherwise
+ * otherwise the counter becomes a doomsday timer for otherwise
* working systems
*/
if (time_after(jiffies, desc->last_unhandled + HZ/10))
diff --git a/kernel/itimer.c b/kernel/itimer.c
index b03451ede528..d802883153da 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -146,6 +146,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
{
cputime_t cval, nval, cinterval, ninterval;
s64 ns_ninterval, ns_nval;
+ u32 error, incr_error;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
nval = timeval_to_cputime(&value->it_value);
@@ -153,8 +154,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
ninterval = timeval_to_cputime(&value->it_interval);
ns_ninterval = timeval_to_ns(&value->it_interval);
- it->incr_error = cputime_sub_ns(ninterval, ns_ninterval);
- it->error = cputime_sub_ns(nval, ns_nval);
+ error = cputime_sub_ns(nval, ns_nval);
+ incr_error = cputime_sub_ns(ninterval, ns_ninterval);
spin_lock_irq(&tsk->sighand->siglock);
@@ -168,6 +169,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
}
it->expires = nval;
it->incr = ninterval;
+ it->error = error;
+ it->incr_error = incr_error;
trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..a9a93d9ee7a7 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
#include <linux/hardirq.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
#include <linux/utsname.h>
#include <linux/numa.h>
#include <linux/suspend.h>
@@ -31,6 +31,7 @@
#include <linux/cpu.h>
#include <linux/console.h>
#include <linux/vmalloc.h>
+#include <linux/swap.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -1082,6 +1083,64 @@ void crash_kexec(struct pt_regs *regs)
}
}
+size_t crash_get_memory_size(void)
+{
+ size_t size;
+ mutex_lock(&kexec_mutex);
+ size = crashk_res.end - crashk_res.start + 1;
+ mutex_unlock(&kexec_mutex);
+ return size;
+}
+
+static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+ init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+ free_page((unsigned long)__va(addr));
+ totalram_pages++;
+ }
+}
+
+int crash_shrink_memory(unsigned long new_size)
+{
+ int ret = 0;
+ unsigned long start, end;
+
+ mutex_lock(&kexec_mutex);
+
+ if (kexec_crash_image) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ start = crashk_res.start;
+ end = crashk_res.end;
+
+ if (new_size >= end - start + 1) {
+ ret = -EINVAL;
+ if (new_size == end - start + 1)
+ ret = 0;
+ goto unlock;
+ }
+
+ start = roundup(start, PAGE_SIZE);
+ end = roundup(start + new_size, PAGE_SIZE);
+
+ free_reserved_phys_range(end, crashk_res.end);
+
+ if (start == end) {
+ crashk_res.end = end;
+ release_resource(&crashk_res);
+ } else
+ crashk_res.end = end - 1;
+
+unlock:
+ mutex_unlock(&kexec_mutex);
+ return ret;
+}
+
static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
size_t data_len)
{
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 7d7014634022..2eb517e23514 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread;
struct task_struct *kgdb_contthread;
int kgdb_single_step;
+pid_t kgdb_sstep_pid;
/* Our I/O buffers. */
static char remcom_in_buffer[BUFMAX];
@@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
*/
if (tid == 0 || tid == -1)
tid = -atomic_read(&kgdb_active) - 2;
- if (tid < 0) {
+ if (tid < -1 && tid > -NR_CPUS - 2) {
if (kgdb_info[-tid - 2].task)
return kgdb_info[-tid - 2].task;
else
return idle_task(-tid - 2);
}
+ if (tid <= 0) {
+ printk(KERN_ERR "KGDB: Internal thread select error\n");
+ dump_stack();
+ return NULL;
+ }
/*
* find_task_by_pid_ns() does not take the tasklist lock anymore
@@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
static int kgdb_activate_sw_breakpoints(void)
{
unsigned long addr;
- int error = 0;
+ int error;
+ int ret = 0;
int i;
for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void)
addr = kgdb_break[i].bpt_addr;
error = kgdb_arch_set_breakpoint(addr,
kgdb_break[i].saved_instr);
- if (error)
- return error;
+ if (error) {
+ ret = error;
+ printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
+ continue;
+ }
kgdb_flush_swbreak_addr(addr);
kgdb_break[i].state = BP_ACTIVE;
}
- return 0;
+ return ret;
}
static int kgdb_set_sw_break(unsigned long addr)
@@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr)
static int kgdb_deactivate_sw_breakpoints(void)
{
unsigned long addr;
- int error = 0;
+ int error;
+ int ret = 0;
int i;
for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void)
addr = kgdb_break[i].bpt_addr;
error = kgdb_arch_remove_breakpoint(addr,
kgdb_break[i].saved_instr);
- if (error)
- return error;
+ if (error) {
+ printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
+ ret = error;
+ }
kgdb_flush_swbreak_addr(addr);
kgdb_break[i].state = BP_SET;
}
- return 0;
+ return ret;
}
static int kgdb_remove_sw_break(unsigned long addr)
@@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks)
return 1;
} else {
- error_packet(remcom_out_buffer, -EINVAL);
- return 0;
+ kgdb_msg_write("KGDB only knows signal 9 (pass)"
+ " and 15 (pass and disconnect)\n"
+ "Executing a continue without signal passing\n", 0);
+ remcom_in_buffer[0] = 'c';
}
/* Indicate fall through */
@@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
struct kgdb_state kgdb_var;
struct kgdb_state *ks = &kgdb_var;
unsigned long flags;
+ int sstep_tries = 100;
int error = 0;
int i, cpu;
@@ -1425,13 +1441,14 @@ acquirelock:
cpu_relax();
/*
- * Do not start the debugger connection on this CPU if the last
- * instance of the exception handler wanted to come into the
- * debugger on a different CPU via a single step
+ * For single stepping, try to only enter on the processor
+ * that was single stepping. To gaurd against a deadlock, the
+ * kernel will only try for the value of sstep_tries before
+ * giving up and continuing on.
*/
if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
- atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
-
+ (kgdb_info[cpu].task &&
+ kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
clocksource_touch_watchdog();
@@ -1524,6 +1541,13 @@ acquirelock:
}
kgdb_restore:
+ if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+ int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
+ if (kgdb_info[sstep_cpu].task)
+ kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
+ else
+ kgdb_sstep_pid = 0;
+ }
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 528dd78e7e7e..3feaf5a74514 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -100,6 +100,26 @@ static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(kexec_crash_loaded);
+static ssize_t kexec_crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%zu\n", crash_get_memory_size());
+}
+static ssize_t kexec_crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long cnt;
+ int ret;
+
+ if (strict_strtoul(buf, 0, &cnt))
+ return -EINVAL;
+
+ ret = crash_shrink_memory(cnt);
+ return ret < 0 ? ret : count;
+}
+KERNEL_ATTR_RW(kexec_crash_size);
+
static ssize_t vmcoreinfo_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -147,6 +167,7 @@ static struct attribute * kernel_attrs[] = {
#ifdef CONFIG_KEXEC
&kexec_loaded_attr.attr,
&kexec_crash_loaded_attr.attr,
+ &kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
NULL
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 4f8df01dbe51..5feaddcdbe49 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -73,11 +73,11 @@ module_param(lock_stat, int, 0644);
* to use a raw spinlock - we really dont want the spinlock
* code to recurse back into the lockdep code...
*/
-static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static int graph_lock(void)
{
- __raw_spin_lock(&lockdep_lock);
+ arch_spin_lock(&lockdep_lock);
/*
* Make sure that if another CPU detected a bug while
* walking the graph we dont change it (while the other
@@ -85,7 +85,7 @@ static int graph_lock(void)
* dropped already)
*/
if (!debug_locks) {
- __raw_spin_unlock(&lockdep_lock);
+ arch_spin_unlock(&lockdep_lock);
return 0;
}
/* prevent any recursions within lockdep from causing deadlocks */
@@ -95,11 +95,11 @@ static int graph_lock(void)
static inline int graph_unlock(void)
{
- if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
+ if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
return DEBUG_LOCKS_WARN_ON(1);
current->lockdep_recursion--;
- __raw_spin_unlock(&lockdep_lock);
+ arch_spin_unlock(&lockdep_lock);
return 0;
}
@@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void)
{
int ret = debug_locks_off();
- __raw_spin_unlock(&lockdep_lock);
+ arch_spin_unlock(&lockdep_lock);
return ret;
}
@@ -140,7 +140,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
}
#ifdef CONFIG_LOCK_STAT
-static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
+ cpu_lock_stats);
static inline u64 lockstat_clock(void)
{
@@ -198,7 +199,7 @@ struct lock_class_stats lock_stats(struct lock_class *class)
memset(&stats, 0, sizeof(struct lock_class_stats));
for_each_possible_cpu(cpu) {
struct lock_class_stats *pcs =
- &per_cpu(lock_stats, cpu)[class - lock_classes];
+ &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
stats.contention_point[i] += pcs->contention_point[i];
@@ -225,7 +226,7 @@ void clear_lock_stats(struct lock_class *class)
for_each_possible_cpu(cpu) {
struct lock_class_stats *cpu_stats =
- &per_cpu(lock_stats, cpu)[class - lock_classes];
+ &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
memset(cpu_stats, 0, sizeof(struct lock_class_stats));
}
@@ -235,12 +236,12 @@ void clear_lock_stats(struct lock_class *class)
static struct lock_class_stats *get_lock_stats(struct lock_class *class)
{
- return &get_cpu_var(lock_stats)[class - lock_classes];
+ return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
}
static void put_lock_stats(struct lock_class_stats *stats)
{
- put_cpu_var(lock_stats);
+ put_cpu_var(cpu_lock_stats);
}
static void lock_release_holdtime(struct held_lock *hlock)
@@ -1169,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
this.class = class;
local_irq_save(flags);
- __raw_spin_lock(&lockdep_lock);
+ arch_spin_lock(&lockdep_lock);
ret = __lockdep_count_forward_deps(&this);
- __raw_spin_unlock(&lockdep_lock);
+ arch_spin_unlock(&lockdep_lock);
local_irq_restore(flags);
return ret;
@@ -1196,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
this.class = class;
local_irq_save(flags);
- __raw_spin_lock(&lockdep_lock);
+ arch_spin_lock(&lockdep_lock);
ret = __lockdep_count_backward_deps(&this);
- __raw_spin_unlock(&lockdep_lock);
+ arch_spin_unlock(&lockdep_lock);
local_irq_restore(flags);
return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 5842a71cf052..e96b8ed1cb6a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
-#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
-
static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
{
@@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme)
free_percpu(freeme);
}
-#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
-/* Number of blocks used and allocated. */
-static unsigned int pcpu_num_used, pcpu_num_allocated;
-/* Size of each block. -ve means used. */
-static int *pcpu_size;
-
-static int split_block(unsigned int i, unsigned short size)
-{
- /* Reallocation required? */
- if (pcpu_num_used + 1 > pcpu_num_allocated) {
- int *new;
-
- new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
- GFP_KERNEL);
- if (!new)
- return 0;
-
- pcpu_num_allocated *= 2;
- pcpu_size = new;
- }
-
- /* Insert a new subblock */
- memmove(&pcpu_size[i+1], &pcpu_size[i],
- sizeof(pcpu_size[0]) * (pcpu_num_used - i));
- pcpu_num_used++;
-
- pcpu_size[i+1] -= size;
- pcpu_size[i] = size;
- return 1;
-}
-
-static inline unsigned int block_size(int val)
-{
- if (val < 0)
- return -val;
- return val;
-}
-
-static void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
-{
- unsigned long extra;
- unsigned int i;
- void *ptr;
- int cpu;
-
- if (align > PAGE_SIZE) {
- printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
- name, align, PAGE_SIZE);
- align = PAGE_SIZE;
- }
-
- ptr = __per_cpu_start;
- for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
- /* Extra for alignment requirement. */
- extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
- BUG_ON(i == 0 && extra != 0);
-
- if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
- continue;
-
- /* Transfer extra to previous block. */
- if (pcpu_size[i-1] < 0)
- pcpu_size[i-1] -= extra;
- else
- pcpu_size[i-1] += extra;
- pcpu_size[i] -= extra;
- ptr += extra;
-
- /* Split block if warranted */
- if (pcpu_size[i] - size > sizeof(unsigned long))
- if (!split_block(i, size))
- return NULL;
-
- /* add the per-cpu scanning areas */
- for_each_possible_cpu(cpu)
- kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
- GFP_KERNEL);
-
- /* Mark allocated */
- pcpu_size[i] = -pcpu_size[i];
- return ptr;
- }
-
- printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
- size);
- return NULL;
-}
-
-static void percpu_modfree(void *freeme)
-{
- unsigned int i;
- void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
- int cpu;
-
- /* First entry is core kernel percpu data. */
- for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
- if (ptr == freeme) {
- pcpu_size[i] = -pcpu_size[i];
- goto free;
- }
- }
- BUG();
-
- free:
- /* remove the per-cpu scanning areas */
- for_each_possible_cpu(cpu)
- kmemleak_free(freeme + per_cpu_offset(cpu));
-
- /* Merge with previous? */
- if (pcpu_size[i-1] >= 0) {
- pcpu_size[i-1] += pcpu_size[i];
- pcpu_num_used--;
- memmove(&pcpu_size[i], &pcpu_size[i+1],
- (pcpu_num_used - i) * sizeof(pcpu_size[0]));
- i--;
- }
- /* Merge with next? */
- if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
- pcpu_size[i] += pcpu_size[i+1];
- pcpu_num_used--;
- memmove(&pcpu_size[i+1], &pcpu_size[i+2],
- (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
- }
-}
-
-static int percpu_modinit(void)
-{
- pcpu_num_used = 2;
- pcpu_num_allocated = 2;
- pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
- GFP_KERNEL);
- /* Static in-kernel percpu data (used). */
- pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
- /* Free room. */
- pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
- if (pcpu_size[1] < 0) {
- printk(KERN_ERR "No per-cpu room for modules.\n");
- pcpu_num_used = 1;
- }
-
- return 0;
-}
-__initcall(percpu_modinit);
-
-#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
-
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
@@ -1030,11 +880,23 @@ static int try_to_force_load(struct module *mod, const char *reason)
}
#ifdef CONFIG_MODVERSIONS
+/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */
+static unsigned long maybe_relocated(unsigned long crc,
+ const struct module *crc_owner)
+{
+#ifdef ARCH_RELOCATES_KCRCTAB
+ if (crc_owner == NULL)
+ return crc - (unsigned long)reloc_start;
+#endif
+ return crc;
+}
+
static int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
struct module *mod,
- const unsigned long *crc)
+ const unsigned long *crc,
+ const struct module *crc_owner)
{
unsigned int i, num_versions;
struct modversion_info *versions;
@@ -1055,10 +917,10 @@ static int check_version(Elf_Shdr *sechdrs,
if (strcmp(versions[i].name, symname) != 0)
continue;
- if (versions[i].crc == *crc)
+ if (versions[i].crc == maybe_relocated(*crc, crc_owner))
return 1;
DEBUGP("Found checksum %lX vs module %lX\n",
- *crc, versions[i].crc);
+ maybe_relocated(*crc, crc_owner), versions[i].crc);
goto bad_version;
}
@@ -1081,7 +943,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
&crc, true, false))
BUG();
- return check_version(sechdrs, versindex, "module_layout", mod, crc);
+ return check_version(sechdrs, versindex, "module_layout", mod, crc,
+ NULL);
}
/* First part is kernel version, which we ignore if module has crcs. */
@@ -1099,7 +962,8 @@ static inline int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
struct module *mod,
- const unsigned long *crc)
+ const unsigned long *crc,
+ const struct module *crc_owner)
{
return 1;
}
@@ -1134,8 +998,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
/* use_module can fail due to OOM,
or module initialization or unloading */
if (sym) {
- if (!check_version(sechdrs, versindex, name, mod, crc) ||
- !use_module(mod, owner))
+ if (!check_version(sechdrs, versindex, name, mod, crc, owner)
+ || !use_module(mod, owner))
sym = NULL;
}
return sym;
@@ -2046,9 +1910,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
unsigned int i;
/* only scan the sections containing data */
- kmemleak_scan_area(mod->module_core, (unsigned long)mod -
- (unsigned long)mod->module_core,
- sizeof(struct module), GFP_KERNEL);
+ kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
for (i = 1; i < hdr->e_shnum; i++) {
if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2057,8 +1919,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
&& strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
continue;
- kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
- (unsigned long)mod->module_core,
+ kmemleak_scan_area((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size, GFP_KERNEL);
}
}
@@ -2386,6 +2247,12 @@ static noinline struct module *load_module(void __user *umod,
"_ftrace_events",
sizeof(*mod->trace_events),
&mod->num_trace_events);
+ /*
+ * This section contains pointers to allocated objects in the trace
+ * code and not scanning it leads to false positives.
+ */
+ kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
+ mod->num_trace_events, GFP_KERNEL);
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 6b2d735846a5..57d527a16f9d 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock)
\
DEBUG_LOCKS_WARN_ON(in_interrupt()); \
local_irq_save(flags); \
- __raw_spin_lock(&(lock)->raw_lock); \
+ arch_spin_lock(&(lock)->rlock.raw_lock);\
DEBUG_LOCKS_WARN_ON(l->magic != l); \
} while (0)
-#define spin_unlock_mutex(lock, flags) \
- do { \
- __raw_spin_unlock(&(lock)->raw_lock); \
- local_irq_restore(flags); \
- preempt_check_resched(); \
+#define spin_unlock_mutex(lock, flags) \
+ do { \
+ arch_spin_unlock(&(lock)->rlock.raw_lock); \
+ local_irq_restore(flags); \
+ preempt_check_resched(); \
} while (0)
diff --git a/kernel/panic.c b/kernel/panic.c
index 96b45d0b4ba5..5827f7b97254 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
*/
#include <linux/debug_locks.h>
#include <linux/interrupt.h>
+#include <linux/kmsg_dump.h>
#include <linux/kallsyms.h>
#include <linux/notifier.h>
#include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
dump_stack();
#endif
+ kmsg_dump(KMSG_DUMP_PANIC);
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
@@ -339,6 +341,7 @@ void oops_exit(void)
{
do_oops_enter_exit();
print_oops_end_marker();
+ kmsg_dump(KMSG_DUMP_OOPS);
}
#ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/params.c b/kernel/params.c
index d656c276508d..cf1b69183127 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,6 +24,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/ctype.h>
+#include <linux/string.h>
#if 0
#define DEBUGP printk
@@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val)
next = args + i;
/* Chew up trailing spaces. */
- while (isspace(*next))
- next++;
- return next;
+ return skip_spaces(next);
}
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
@@ -139,8 +138,7 @@ int parse_args(const char *name,
DEBUGP("Parsing ARGS: %s\n", args);
/* Chew leading spaces */
- while (isspace(*args))
- args++;
+ args = skip_spaces(args);
while (*args) {
int ret;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6b7ddba1dd64..97d1a3dd7a59 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -36,7 +36,7 @@
/*
* Each CPU has a list of per CPU events:
*/
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
int perf_max_events __read_mostly = 1;
static int perf_reserved_percpu __read_mostly;
@@ -203,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
* if so. If we locked the right context, then it
* can't get swapped on us any more.
*/
- spin_lock_irqsave(&ctx->lock, *flags);
+ raw_spin_lock_irqsave(&ctx->lock, *flags);
if (ctx != rcu_dereference(task->perf_event_ctxp)) {
- spin_unlock_irqrestore(&ctx->lock, *flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, *flags);
goto retry;
}
if (!atomic_inc_not_zero(&ctx->refcount)) {
- spin_unlock_irqrestore(&ctx->lock, *flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, *flags);
ctx = NULL;
}
}
@@ -231,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
ctx = perf_lock_task_context(task, &flags);
if (ctx) {
++ctx->pin_count;
- spin_unlock_irqrestore(&ctx->lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
return ctx;
}
@@ -240,9 +240,9 @@ static void perf_unpin_context(struct perf_event_context *ctx)
{
unsigned long flags;
- spin_lock_irqsave(&ctx->lock, flags);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
--ctx->pin_count;
- spin_unlock_irqrestore(&ctx->lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
put_ctx(ctx);
}
@@ -427,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
/*
* Protect the list operation against NMI by disabling the
* events on a global level.
@@ -449,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
}
perf_enable();
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
@@ -476,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
if (!task) {
/*
* Per cpu events are removed via an smp call and
- * the removal is always sucessful.
+ * the removal is always successful.
*/
smp_call_function_single(event->cpu,
__perf_event_remove_from_context,
@@ -488,12 +488,12 @@ retry:
task_oncpu_function_call(task, __perf_event_remove_from_context,
event);
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
/*
* If the context is active we need to retry the smp call.
*/
if (ctx->nr_active && !list_empty(&event->group_entry)) {
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
@@ -504,7 +504,7 @@ retry:
*/
if (!list_empty(&event->group_entry))
list_del_event(event, ctx);
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
}
/*
@@ -535,7 +535,7 @@ static void __perf_event_disable(void *info)
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
/*
* If the event is on, turn it off.
@@ -551,7 +551,7 @@ static void __perf_event_disable(void *info)
event->state = PERF_EVENT_STATE_OFF;
}
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
-static void perf_event_disable(struct perf_event *event)
+void perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -584,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
retry:
task_oncpu_function_call(task, __perf_event_disable, event);
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
/*
* If the event is still active, we need to retry the cross-call.
*/
if (event->state == PERF_EVENT_STATE_ACTIVE) {
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
@@ -602,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
event->state = PERF_EVENT_STATE_OFF;
}
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
}
static int
@@ -770,7 +770,7 @@ static void __perf_install_in_context(void *info)
cpuctx->task_ctx = ctx;
}
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
update_context_time(ctx);
@@ -782,6 +782,9 @@ static void __perf_install_in_context(void *info)
add_event_to_ctx(event, ctx);
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ goto unlock;
+
/*
* Don't put the event on if it is disabled or if
* it is in a group and the group isn't on.
@@ -820,7 +823,7 @@ static void __perf_install_in_context(void *info)
unlock:
perf_enable();
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -845,7 +848,7 @@ perf_install_in_context(struct perf_event_context *ctx,
if (!task) {
/*
* Per cpu events are installed via an smp call and
- * the install is always sucessful.
+ * the install is always successful.
*/
smp_call_function_single(cpu, __perf_install_in_context,
event, 1);
@@ -856,12 +859,12 @@ retry:
task_oncpu_function_call(task, __perf_install_in_context,
event);
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
/*
* we need to retry the smp call.
*/
if (ctx->is_active && list_empty(&event->group_entry)) {
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
@@ -872,7 +875,7 @@ retry:
*/
if (list_empty(&event->group_entry))
add_event_to_ctx(event, ctx);
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
}
/*
@@ -917,7 +920,7 @@ static void __perf_event_enable(void *info)
cpuctx->task_ctx = ctx;
}
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
update_context_time(ctx);
@@ -925,6 +928,9 @@ static void __perf_event_enable(void *info)
goto unlock;
__perf_event_mark_enabled(event, ctx);
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ goto unlock;
+
/*
* If the event is in a group and isn't the group leader,
* then don't put it on unless the group is on.
@@ -959,7 +965,7 @@ static void __perf_event_enable(void *info)
}
unlock:
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -971,7 +977,7 @@ static void __perf_event_enable(void *info)
* perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable.
*/
-static void perf_event_enable(struct perf_event *event)
+void perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -985,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
return;
}
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
if (event->state >= PERF_EVENT_STATE_INACTIVE)
goto out;
@@ -1000,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
event->state = PERF_EVENT_STATE_OFF;
retry:
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
task_oncpu_function_call(task, __perf_event_enable, event);
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
/*
* If the context is active and the event is still off,
@@ -1020,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
__perf_event_mark_enabled(event, ctx);
out:
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
}
static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1042,7 +1048,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
{
struct perf_event *event;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
ctx->is_active = 0;
if (likely(!ctx->nr_events))
goto out;
@@ -1055,7 +1061,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
}
perf_enable();
out:
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -1193,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
* order we take the locks because no other cpu could
* be trying to lock both of these tasks.
*/
- spin_lock(&ctx->lock);
- spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&ctx->lock);
+ raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
/*
* XXX do we need a memory barrier of sorts
@@ -1208,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
perf_event_sync_stat(ctx, next_ctx);
}
- spin_unlock(&next_ctx->lock);
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&next_ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
rcu_read_unlock();
@@ -1251,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
struct perf_event *event;
int can_add_hw = 1;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
if (likely(!ctx->nr_events))
goto out;
@@ -1306,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
}
perf_enable();
out:
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -1370,7 +1376,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
struct hw_perf_event *hwc;
u64 interrupts, freq;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -1425,7 +1431,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
perf_enable();
}
}
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
/*
@@ -1438,7 +1444,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
if (!ctx->nr_events)
return;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
/*
* Rotate the first entry last (works just fine for group events too):
*/
@@ -1449,7 +1455,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
}
perf_enable();
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
}
void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1498,7 +1504,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
__perf_event_task_sched_out(ctx);
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
list_for_each_entry(event, &ctx->group_list, group_entry) {
if (!event->attr.enable_on_exec)
@@ -1516,7 +1522,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
if (enabled)
unclone_ctx(ctx);
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
perf_event_task_sched_in(task, smp_processor_id());
out:
@@ -1542,10 +1548,10 @@ static void __perf_event_read(void *info)
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- spin_lock(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
update_context_time(ctx);
update_event_times(event);
- spin_unlock(&ctx->lock);
+ raw_spin_unlock(&ctx->lock);
event->pmu->read(event);
}
@@ -1563,10 +1569,10 @@ static u64 perf_event_read(struct perf_event *event)
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->lock, flags);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
update_context_time(ctx);
update_event_times(event);
- spin_unlock_irqrestore(&ctx->lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
return atomic64_read(&event->count);
@@ -1579,8 +1585,7 @@ static void
__perf_event_init_context(struct perf_event_context *ctx,
struct task_struct *task)
{
- memset(ctx, 0, sizeof(*ctx));
- spin_lock_init(&ctx->lock);
+ raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->group_list);
INIT_LIST_HEAD(&ctx->event_list);
@@ -1596,15 +1601,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
unsigned long flags;
int err;
- /*
- * If cpu is not a wildcard then this is a percpu event:
- */
- if (cpu != -1) {
+ if (pid == -1 && cpu != -1) {
/* Must be root to operate on a CPU event: */
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
- if (cpu < 0 || cpu > num_possible_cpus())
+ if (cpu < 0 || cpu >= nr_cpumask_bits)
return ERR_PTR(-EINVAL);
/*
@@ -1612,7 +1614,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
* offline CPU and activate it when the CPU comes up, but
* that's for later.
*/
- if (!cpu_isset(cpu, cpu_online_map))
+ if (!cpu_online(cpu))
return ERR_PTR(-ENODEV);
cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1650,11 +1652,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
ctx = perf_lock_task_context(task, &flags);
if (ctx) {
unclone_ctx(ctx);
- spin_unlock_irqrestore(&ctx->lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
if (!ctx) {
- ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+ ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
err = -ENOMEM;
if (!ctx)
goto errout;
@@ -1988,7 +1990,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (!value)
return -EINVAL;
- spin_lock_irq(&ctx->lock);
+ raw_spin_lock_irq(&ctx->lock);
if (event->attr.freq) {
if (value > sysctl_perf_event_sample_rate) {
ret = -EINVAL;
@@ -2001,7 +2003,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
event->hw.sample_period = value;
}
unlock:
- spin_unlock_irq(&ctx->lock);
+ raw_spin_unlock_irq(&ctx->lock);
return ret;
}
@@ -4011,6 +4013,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
data.addr = 0;
+ data.raw = NULL;
data.period = event->hw.last_period;
regs = get_irq_regs();
/*
@@ -4080,8 +4083,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now;
now = cpu_clock(cpu);
- prev = atomic64_read(&event->hw.prev_count);
- atomic64_set(&event->hw.prev_count, now);
+ prev = atomic64_xchg(&event->hw.prev_count, now);
atomic64_add(now - prev, &event->count);
}
@@ -4286,15 +4288,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
- /*
- * The breakpoint is already filled if we haven't created the counter
- * through perf syscall
- * FIXME: manage to get trigerred to NULL if it comes from syscalls
- */
- if (!bp->callback)
- err = register_perf_hw_breakpoint(bp);
- else
- err = __register_perf_hw_breakpoint(bp);
+
+ err = register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
@@ -4308,6 +4303,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
+ sample.raw = NULL;
sample.addr = bp->attr.bp_addr;
if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4386,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
- perf_callback_t callback,
+ perf_overflow_handler_t overflow_handler,
gfp_t gfpflags)
{
const struct pmu *pmu;
@@ -4433,10 +4429,10 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE;
- if (!callback && parent_event)
- callback = parent_event->callback;
+ if (!overflow_handler && parent_event)
+ overflow_handler = parent_event->overflow_handler;
- event->callback = callback;
+ event->overflow_handler = overflow_handler;
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
@@ -4571,7 +4567,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->type >= PERF_TYPE_MAX)
return -EINVAL;
- if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+ if (attr->__reserved_1 || attr->__reserved_2)
return -EINVAL;
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4776,7 +4772,8 @@ err_put_context:
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
- pid_t pid, perf_callback_t callback)
+ pid_t pid,
+ perf_overflow_handler_t overflow_handler)
{
struct perf_event *event;
struct perf_event_context *ctx;
@@ -4793,7 +4790,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
}
event = perf_event_alloc(attr, cpu, ctx, NULL,
- NULL, callback, GFP_KERNEL);
+ NULL, overflow_handler, GFP_KERNEL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_put_context;
@@ -4998,7 +4995,7 @@ void perf_event_exit_task(struct task_struct *child)
* reading child->perf_event_ctxp, we wait until it has
* incremented the context's refcount before we do put_ctx below.
*/
- spin_lock(&child_ctx->lock);
+ raw_spin_lock(&child_ctx->lock);
child->perf_event_ctxp = NULL;
/*
* If this context is a clone; unclone it so it can't get
@@ -5007,7 +5004,7 @@ void perf_event_exit_task(struct task_struct *child)
*/
unclone_ctx(child_ctx);
update_context_time(child_ctx);
- spin_unlock_irqrestore(&child_ctx->lock, flags);
+ raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
/*
* Report the task dead after unscheduling the events so that we
@@ -5090,7 +5087,7 @@ again:
*/
int perf_event_init_task(struct task_struct *child)
{
- struct perf_event_context *child_ctx, *parent_ctx;
+ struct perf_event_context *child_ctx = NULL, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
@@ -5106,20 +5103,6 @@ int perf_event_init_task(struct task_struct *child)
return 0;
/*
- * This is executed from the parent task context, so inherit
- * events that have been marked for cloning.
- * First allocate and initialize a context for the child.
- */
-
- child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
- if (!child_ctx)
- return -ENOMEM;
-
- __perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
- get_task_struct(child);
-
- /*
* If the parent's context is a clone, pin it so it won't get
* swapped under us.
*/
@@ -5149,6 +5132,26 @@ int perf_event_init_task(struct task_struct *child)
continue;
}
+ if (!child->perf_event_ctxp) {
+ /*
+ * This is executed from the parent task context, so
+ * inherit events that have been marked for cloning.
+ * First allocate and initialize a context for the
+ * child.
+ */
+
+ child_ctx = kzalloc(sizeof(struct perf_event_context),
+ GFP_KERNEL);
+ if (!child_ctx) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ __perf_event_init_context(child_ctx, child);
+ child->perf_event_ctxp = child_ctx;
+ get_task_struct(child);
+ }
+
ret = inherit_group(event, parent, parent_ctx,
child, child_ctx);
if (ret) {
@@ -5177,6 +5180,7 @@ int perf_event_init_task(struct task_struct *child)
get_ctx(child_ctx->parent_ctx);
}
+exit:
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
@@ -5291,11 +5295,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
perf_reserved_percpu = val;
for_each_online_cpu(cpu) {
cpuctx = &per_cpu(perf_cpu_context, cpu);
- spin_lock_irq(&cpuctx->ctx.lock);
+ raw_spin_lock_irq(&cpuctx->ctx.lock);
mpt = min(perf_max_events - cpuctx->ctx.nr_events,
perf_max_events - perf_reserved_percpu);
cpuctx->max_pertask = mpt;
- spin_unlock_irq(&cpuctx->ctx.lock);
+ raw_spin_unlock_irq(&cpuctx->ctx.lock);
}
spin_unlock(&perf_resource_lock);
diff --git a/kernel/pid.c b/kernel/pid.c
index d3f722d20f9c..2e17c9c92cbe 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -141,11 +141,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
* installing it:
*/
spin_lock_irq(&pidmap_lock);
- if (map->page)
- kfree(page);
- else
+ if (!map->page) {
map->page = page;
+ page = NULL;
+ }
spin_unlock_irq(&pidmap_lock);
+ kfree(page);
if (unlikely(!map->page))
break;
}
@@ -268,12 +269,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
+ upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
- for (i = ns->level; i >= 0; i--) {
- upid = &pid->numbers[i];
+ for ( ; upid >= pid->numbers; --upid)
hlist_add_head_rcu(&upid->pid_chain,
&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
- }
spin_unlock_irq(&pidmap_lock);
out:
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index dfdec524d1b7..3db49b9ca374 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -29,7 +29,6 @@
#include <linux/pm_qos_params.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/time.h>
@@ -344,37 +343,33 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
}
EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
-#define PID_NAME_LEN sizeof("process_1234567890")
-static char name[PID_NAME_LEN];
+#define PID_NAME_LEN 32
static int pm_qos_power_open(struct inode *inode, struct file *filp)
{
int ret;
long pm_qos_class;
+ char name[PID_NAME_LEN];
- lock_kernel();
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
filp->private_data = (void *)pm_qos_class;
- sprintf(name, "process_%d", current->pid);
+ snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
ret = pm_qos_add_requirement(pm_qos_class, name,
PM_QOS_DEFAULT_VALUE);
- if (ret >= 0) {
- unlock_kernel();
+ if (ret >= 0)
return 0;
- }
}
- unlock_kernel();
-
return -EPERM;
}
static int pm_qos_power_release(struct inode *inode, struct file *filp)
{
int pm_qos_class;
+ char name[PID_NAME_LEN];
pm_qos_class = (long)filp->private_data;
- sprintf(name, "process_%d", current->pid);
+ snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
pm_qos_remove_requirement(pm_qos_class, name);
return 0;
@@ -385,13 +380,14 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
{
s32 value;
int pm_qos_class;
+ char name[PID_NAME_LEN];
pm_qos_class = (long)filp->private_data;
if (count != sizeof(s32))
return -EINVAL;
if (copy_from_user(&value, buf, sizeof(s32)))
return -EFAULT;
- sprintf(name, "process_%d", current->pid);
+ snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
pm_qos_update_requirement(pm_qos_class, name, value);
return sizeof(s32);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 5c9dc228747b..438ff4523513 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -384,7 +384,8 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
/*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
- * This is called from sys_timer_create with the new timer already locked.
+ * This is called from sys_timer_create() and do_cpu_nanosleep() with the
+ * new timer already all-zeros initialized.
*/
int posix_cpu_timer_create(struct k_itimer *new_timer)
{
@@ -396,8 +397,6 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
return -EINVAL;
INIT_LIST_HEAD(&new_timer->it.cpu.entry);
- new_timer->it.cpu.incr.sched = 0;
- new_timer->it.cpu.expires.sched = 0;
read_lock(&tasklist_lock);
if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c3b81c30e5d5..43191815f874 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_PM_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 5187136fe1de..218e5af90156 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -6,7 +6,7 @@
#include <linux/vt_kern.h>
#include <linux/kbd_kern.h>
-#include <linux/console.h>
+#include <linux/vt.h>
#include <linux/module.h>
#include "power.h"
@@ -21,8 +21,7 @@ int pm_prepare_console(void)
if (orig_fgconsole < 0)
return 1;
- orig_kmsg = kmsg_redirect;
- kmsg_redirect = SUSPEND_CONSOLE;
+ orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
return 0;
}
@@ -30,7 +29,7 @@ void pm_restore_console(void)
{
if (orig_fgconsole >= 0) {
vt_move_to_console(orig_fgconsole, 0);
- kmsg_redirect = orig_kmsg;
+ vt_kmsg_redirect(orig_kmsg);
}
}
#endif
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 04a9e90d248f..bbfe472d7524 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -32,6 +32,7 @@ static int noresume = 0;
static char resume_file[256] = CONFIG_PM_STD_PARTITION;
dev_t swsusp_resume_device;
sector_t swsusp_resume_block;
+int in_suspend __nosavedata = 0;
enum {
HIBERNATION_INVALID,
@@ -202,6 +203,35 @@ static void platform_recover(int platform_mode)
}
/**
+ * swsusp_show_speed - print the time elapsed between two events.
+ * @start: Starting event.
+ * @stop: Final event.
+ * @nr_pages - number of pages processed between @start and @stop
+ * @msg - introductory message to print
+ */
+
+void swsusp_show_speed(struct timeval *start, struct timeval *stop,
+ unsigned nr_pages, char *msg)
+{
+ s64 elapsed_centisecs64;
+ int centisecs;
+ int k;
+ int kps;
+
+ elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+ do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+ centisecs = elapsed_centisecs64;
+ if (centisecs == 0)
+ centisecs = 1; /* avoid div-by-zero */
+ k = nr_pages * (PAGE_SIZE / 1024);
+ kps = (k * 100) / centisecs;
+ printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n",
+ msg, k,
+ centisecs / 100, centisecs % 100,
+ kps / 1000, (kps % 1000) / 10);
+}
+
+/**
* create_image - freeze devices that need to be frozen with interrupts
* off, create the hibernation image and thaw those devices. Control
* reappears in this routine after a restore.
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 347d2cc88cd0..0998c7139053 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -220,6 +220,7 @@ static struct attribute_group attr_group = {
#ifdef CONFIG_PM_RUNTIME
struct workqueue_struct *pm_wq;
+EXPORT_SYMBOL_GPL(pm_wq);
static int __init pm_start_workqueue(void)
{
diff --git a/kernel/power/process.c b/kernel/power/process.c
index cc2e55373b68..5ade1bdcf366 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
/*
* Timeout for stopping processes
@@ -41,7 +42,7 @@ static int try_to_freeze_tasks(bool sig_only)
do_gettimeofday(&start);
end_time = jiffies + TIMEOUT;
- do {
+ while (true) {
todo = 0;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
@@ -62,10 +63,15 @@ static int try_to_freeze_tasks(bool sig_only)
todo++;
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
- yield(); /* Yield is okay here */
- if (time_after(jiffies, end_time))
+ if (!todo || time_after(jiffies, end_time))
break;
- } while (todo);
+
+ /*
+ * We need to retry, but first give the freezing tasks some
+ * time to enter the regrigerator.
+ */
+ msleep(10);
+ }
do_gettimeofday(&end);
elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 890f6b11b1d3..09b2b0ae9e9d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -38,6 +38,107 @@ struct swsusp_header {
static struct swsusp_header *swsusp_header;
+/**
+ * The following functions are used for tracing the allocated
+ * swap pages, so that they can be freed in case of an error.
+ */
+
+struct swsusp_extent {
+ struct rb_node node;
+ unsigned long start;
+ unsigned long end;
+};
+
+static struct rb_root swsusp_extents = RB_ROOT;
+
+static int swsusp_extents_insert(unsigned long swap_offset)
+{
+ struct rb_node **new = &(swsusp_extents.rb_node);
+ struct rb_node *parent = NULL;
+ struct swsusp_extent *ext;
+
+ /* Figure out where to put the new node */
+ while (*new) {
+ ext = container_of(*new, struct swsusp_extent, node);
+ parent = *new;
+ if (swap_offset < ext->start) {
+ /* Try to merge */
+ if (swap_offset == ext->start - 1) {
+ ext->start--;
+ return 0;
+ }
+ new = &((*new)->rb_left);
+ } else if (swap_offset > ext->end) {
+ /* Try to merge */
+ if (swap_offset == ext->end + 1) {
+ ext->end++;
+ return 0;
+ }
+ new = &((*new)->rb_right);
+ } else {
+ /* It already is in the tree */
+ return -EINVAL;
+ }
+ }
+ /* Add the new node and rebalance the tree. */
+ ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
+ if (!ext)
+ return -ENOMEM;
+
+ ext->start = swap_offset;
+ ext->end = swap_offset;
+ rb_link_node(&ext->node, parent, new);
+ rb_insert_color(&ext->node, &swsusp_extents);
+ return 0;
+}
+
+/**
+ * alloc_swapdev_block - allocate a swap page and register that it has
+ * been allocated, so that it can be freed in case of an error.
+ */
+
+sector_t alloc_swapdev_block(int swap)
+{
+ unsigned long offset;
+
+ offset = swp_offset(get_swap_page_of_type(swap));
+ if (offset) {
+ if (swsusp_extents_insert(offset))
+ swap_free(swp_entry(swap, offset));
+ else
+ return swapdev_block(swap, offset);
+ }
+ return 0;
+}
+
+/**
+ * free_all_swap_pages - free swap pages allocated for saving image data.
+ * It also frees the extents used to register which swap entres had been
+ * allocated.
+ */
+
+void free_all_swap_pages(int swap)
+{
+ struct rb_node *node;
+
+ while ((node = swsusp_extents.rb_node)) {
+ struct swsusp_extent *ext;
+ unsigned long offset;
+
+ ext = container_of(node, struct swsusp_extent, node);
+ rb_erase(node, &swsusp_extents);
+ for (offset = ext->start; offset <= ext->end; offset++)
+ swap_free(swp_entry(swap, offset));
+
+ kfree(ext);
+ }
+}
+
+int swsusp_swap_in_use(void)
+{
+ return (swsusp_extents.rb_node != NULL);
+}
+
/*
* General things
*/
@@ -336,7 +437,7 @@ static int save_image(struct swap_map_handle *handle,
if (ret)
break;
if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
+ printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
nr_pages++;
}
err2 = wait_on_bio_chain(&bio);
@@ -344,9 +445,9 @@ static int save_image(struct swap_map_handle *handle,
if (!ret)
ret = err2;
if (!ret)
- printk("\b\b\b\bdone\n");
+ printk(KERN_CONT "\b\b\b\bdone\n");
else
- printk("\n");
+ printk(KERN_CONT "\n");
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
return ret;
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 6a07f4dbf2f8..5b3601bd1893 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -56,133 +56,3 @@
#include "power.h"
int in_suspend __nosavedata = 0;
-
-/**
- * The following functions are used for tracing the allocated
- * swap pages, so that they can be freed in case of an error.
- */
-
-struct swsusp_extent {
- struct rb_node node;
- unsigned long start;
- unsigned long end;
-};
-
-static struct rb_root swsusp_extents = RB_ROOT;
-
-static int swsusp_extents_insert(unsigned long swap_offset)
-{
- struct rb_node **new = &(swsusp_extents.rb_node);
- struct rb_node *parent = NULL;
- struct swsusp_extent *ext;
-
- /* Figure out where to put the new node */
- while (*new) {
- ext = container_of(*new, struct swsusp_extent, node);
- parent = *new;
- if (swap_offset < ext->start) {
- /* Try to merge */
- if (swap_offset == ext->start - 1) {
- ext->start--;
- return 0;
- }
- new = &((*new)->rb_left);
- } else if (swap_offset > ext->end) {
- /* Try to merge */
- if (swap_offset == ext->end + 1) {
- ext->end++;
- return 0;
- }
- new = &((*new)->rb_right);
- } else {
- /* It already is in the tree */
- return -EINVAL;
- }
- }
- /* Add the new node and rebalance the tree. */
- ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
- if (!ext)
- return -ENOMEM;
-
- ext->start = swap_offset;
- ext->end = swap_offset;
- rb_link_node(&ext->node, parent, new);
- rb_insert_color(&ext->node, &swsusp_extents);
- return 0;
-}
-
-/**
- * alloc_swapdev_block - allocate a swap page and register that it has
- * been allocated, so that it can be freed in case of an error.
- */
-
-sector_t alloc_swapdev_block(int swap)
-{
- unsigned long offset;
-
- offset = swp_offset(get_swap_page_of_type(swap));
- if (offset) {
- if (swsusp_extents_insert(offset))
- swap_free(swp_entry(swap, offset));
- else
- return swapdev_block(swap, offset);
- }
- return 0;
-}
-
-/**
- * free_all_swap_pages - free swap pages allocated for saving image data.
- * It also frees the extents used to register which swap entres had been
- * allocated.
- */
-
-void free_all_swap_pages(int swap)
-{
- struct rb_node *node;
-
- while ((node = swsusp_extents.rb_node)) {
- struct swsusp_extent *ext;
- unsigned long offset;
-
- ext = container_of(node, struct swsusp_extent, node);
- rb_erase(node, &swsusp_extents);
- for (offset = ext->start; offset <= ext->end; offset++)
- swap_free(swp_entry(swap, offset));
-
- kfree(ext);
- }
-}
-
-int swsusp_swap_in_use(void)
-{
- return (swsusp_extents.rb_node != NULL);
-}
-
-/**
- * swsusp_show_speed - print the time elapsed between two events represented by
- * @start and @stop
- *
- * @nr_pages - number of pages processed between @start and @stop
- * @msg - introductory message to print
- */
-
-void swsusp_show_speed(struct timeval *start, struct timeval *stop,
- unsigned nr_pages, char *msg)
-{
- s64 elapsed_centisecs64;
- int centisecs;
- int k;
- int kps;
-
- elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
- do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
- centisecs = elapsed_centisecs64;
- if (centisecs == 0)
- centisecs = 1; /* avoid div-by-zero */
- k = nr_pages * (PAGE_SIZE / 1024);
- kps = (k * 100) / centisecs;
- printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n",
- msg, k,
- centisecs / 100, centisecs % 100,
- kps / 1000, (kps % 1000) / 10);
-}
diff --git a/kernel/printk.c b/kernel/printk.c
index b5ac4d99c667..17463ca2e229 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -34,6 +34,7 @@
#include <linux/syscalls.h>
#include <linux/kexec.h>
#include <linux/ratelimit.h>
+#include <linux/kmsg_dump.h>
#include <asm/uaccess.h>
@@ -1405,4 +1406,122 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
return false;
}
EXPORT_SYMBOL(printk_timed_ratelimit);
+
+static DEFINE_SPINLOCK(dump_list_lock);
+static LIST_HEAD(dump_list);
+
+/**
+ * kmsg_dump_register - register a kernel log dumper.
+ * @dumper: pointer to the kmsg_dumper structure
+ *
+ * Adds a kernel log dumper to the system. The dump callback in the
+ * structure will be called when the kernel oopses or panics and must be
+ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
+ */
+int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+ unsigned long flags;
+ int err = -EBUSY;
+
+ /* The dump callback needs to be set */
+ if (!dumper->dump)
+ return -EINVAL;
+
+ spin_lock_irqsave(&dump_list_lock, flags);
+ /* Don't allow registering multiple times */
+ if (!dumper->registered) {
+ dumper->registered = 1;
+ list_add_tail(&dumper->list, &dump_list);
+ err = 0;
+ }
+ spin_unlock_irqrestore(&dump_list_lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_register);
+
+/**
+ * kmsg_dump_unregister - unregister a kmsg dumper.
+ * @dumper: pointer to the kmsg_dumper structure
+ *
+ * Removes a dump device from the system. Returns zero on success and
+ * %-EINVAL otherwise.
+ */
+int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+ unsigned long flags;
+ int err = -EINVAL;
+
+ spin_lock_irqsave(&dump_list_lock, flags);
+ if (dumper->registered) {
+ dumper->registered = 0;
+ list_del(&dumper->list);
+ err = 0;
+ }
+ spin_unlock_irqrestore(&dump_list_lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
+
+static const char const *kmsg_reasons[] = {
+ [KMSG_DUMP_OOPS] = "oops",
+ [KMSG_DUMP_PANIC] = "panic",
+};
+
+static const char *kmsg_to_str(enum kmsg_dump_reason reason)
+{
+ if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
+ return "unknown";
+
+ return kmsg_reasons[reason];
+}
+
+/**
+ * kmsg_dump - dump kernel log to kernel message dumpers.
+ * @reason: the reason (oops, panic etc) for dumping
+ *
+ * Iterate through each of the dump devices and call the oops/panic
+ * callbacks with the log buffer.
+ */
+void kmsg_dump(enum kmsg_dump_reason reason)
+{
+ unsigned long end;
+ unsigned chars;
+ struct kmsg_dumper *dumper;
+ const char *s1, *s2;
+ unsigned long l1, l2;
+ unsigned long flags;
+
+ /* Theoretically, the log could move on after we do this, but
+ there's not a lot we can do about that. The new messages
+ will overwrite the start of what we dump. */
+ spin_lock_irqsave(&logbuf_lock, flags);
+ end = log_end & LOG_BUF_MASK;
+ chars = logged_chars;
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+
+ if (logged_chars > end) {
+ s1 = log_buf + log_buf_len - logged_chars + end;
+ l1 = logged_chars - end;
+
+ s2 = log_buf;
+ l2 = end;
+ } else {
+ s1 = "";
+ l1 = 0;
+
+ s2 = log_buf + end - logged_chars;
+ l2 = logged_chars;
+ }
+
+ if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
+ printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
+ kmsg_to_str(reason));
+ return;
+ }
+ list_for_each_entry(dumper, &dump_list, list)
+ dumper->dump(dumper, reason, s1, l1, s2, l2);
+ spin_unlock_irqrestore(&dump_list_lock, flags);
+}
#endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a621a67ef4e3..9bb52177af02 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -763,13 +763,13 @@ static void rcu_torture_timer(unsigned long unused)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- ++__get_cpu_var(rcu_torture_count)[pipe_count];
+ __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
- ++__get_cpu_var(rcu_torture_batch)[completed];
+ __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
preempt_enable();
cur_ops->readunlock(idx);
}
@@ -818,13 +818,13 @@ rcu_torture_reader(void *arg)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- ++__get_cpu_var(rcu_torture_count)[pipe_count];
+ __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
- ++__get_cpu_var(rcu_torture_batch)[completed];
+ __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
preempt_enable();
cur_ops->readunlock(idx);
schedule();
diff --git a/kernel/relay.c b/kernel/relay.c
index 760c26209a3c..c705a41b4ba3 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1198,7 +1198,7 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
relay_consume_bytes(rbuf, buf->private);
}
-static struct pipe_buf_operations relay_pipe_buf_ops = {
+static const struct pipe_buf_operations relay_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
diff --git a/kernel/resource.c b/kernel/resource.c
index fb11a58b9594..dc15686b7a77 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
void *alignf_data)
{
struct resource *this = root->child;
+ resource_size_t start, end;
- new->start = root->start;
+ start = root->start;
/*
* Skip past an allocated resource that starts at 0, since the assignment
* of this->start - 1 to new->end below would cause an underflow.
*/
if (this && this->start == 0) {
- new->start = this->end + 1;
+ start = this->end + 1;
this = this->sibling;
}
for(;;) {
if (this)
- new->end = this->start - 1;
+ end = this->start - 1;
else
- new->end = root->end;
- if (new->start < min)
- new->start = min;
- if (new->end > max)
- new->end = max;
- new->start = ALIGN(new->start, align);
+ end = root->end;
+ if (start < min)
+ start = min;
+ if (end > max)
+ end = max;
+ start = ALIGN(start, align);
if (alignf)
alignf(alignf_data, new, size, align);
- if (new->start < new->end && new->end - new->start >= size - 1) {
- new->end = new->start + size - 1;
+ if (start < end && end - start >= size - 1) {
+ new->start = start;
+ new->end = start + size - 1;
return 0;
}
if (!this)
break;
- new->start = this->end + 1;
+ start = this->end + 1;
this = this->sibling;
}
return -EBUSY;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5fcb4fe645e2..ddabb54bb5c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -37,8 +37,8 @@ do { \
if (rt_trace_on) { \
rt_trace_on = 0; \
console_verbose(); \
- if (spin_is_locked(&current->pi_lock)) \
- spin_unlock(&current->pi_lock); \
+ if (raw_spin_is_locked(&current->pi_lock)) \
+ raw_spin_unlock(&current->pi_lock); \
} \
} while (0)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 29bd4baf9e75..a9604815786a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -138,9 +138,9 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
{
unsigned long flags;
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
/*
@@ -195,7 +195,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* Task can not go away as we did a get_task() before !
*/
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
/*
@@ -231,8 +231,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
goto out_unlock_pi;
lock = waiter->lock;
- if (!spin_trylock(&lock->wait_lock)) {
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!raw_spin_trylock(&lock->wait_lock)) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
cpu_relax();
goto retry;
}
@@ -240,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* Deadlock detection */
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
ret = deadlock_detect ? -EDEADLK : 0;
goto out_unlock_pi;
}
@@ -253,13 +253,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
plist_add(&waiter->list_entry, &lock->wait_list);
/* Release the task */
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
put_task_struct(task);
/* Grab the next task */
task = rt_mutex_owner(lock);
get_task_struct(task);
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
if (waiter == rt_mutex_top_waiter(lock)) {
/* Boost the owner */
@@ -277,10 +277,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
__rt_mutex_adjust_prio(task);
}
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
top_waiter = rt_mutex_top_waiter(lock);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
if (!detect_deadlock && waiter != top_waiter)
goto out_put_task;
@@ -288,7 +288,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
goto again;
out_unlock_pi:
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
out_put_task:
put_task_struct(task);
@@ -313,9 +313,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
if (pendowner == task)
return 1;
- spin_lock_irqsave(&pendowner->pi_lock, flags);
+ raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
if (task->prio >= pendowner->prio) {
- spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
return 0;
}
@@ -325,7 +325,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
* priority.
*/
if (likely(!rt_mutex_has_waiters(lock))) {
- spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
return 1;
}
@@ -333,7 +333,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
next = rt_mutex_top_waiter(lock);
plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
__rt_mutex_adjust_prio(pendowner);
- spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
/*
* We are going to steal the lock and a waiter was
@@ -350,10 +350,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
* might be task:
*/
if (likely(next->task != task)) {
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
plist_add(&next->pi_list_entry, &task->pi_waiters);
__rt_mutex_adjust_prio(task);
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
return 1;
}
@@ -420,7 +420,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
unsigned long flags;
int chain_walk = 0, res;
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
@@ -434,17 +434,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
task->pi_blocked_on = waiter;
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
if (waiter == rt_mutex_top_waiter(lock)) {
- spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
__rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
chain_walk = 1;
- spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
}
else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
chain_walk = 1;
@@ -459,12 +459,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
*/
get_task_struct(owner);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
task);
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
return res;
}
@@ -483,7 +483,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
struct task_struct *pendowner;
unsigned long flags;
- spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock_irqsave(&current->pi_lock, flags);
waiter = rt_mutex_top_waiter(lock);
plist_del(&waiter->list_entry, &lock->wait_list);
@@ -500,7 +500,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
- spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
/*
* Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +509,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* waiter with higher priority than pending-owner->normal_prio
* is blocked on the unboosted (pending) owner.
*/
- spin_lock_irqsave(&pendowner->pi_lock, flags);
+ raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
WARN_ON(!pendowner->pi_blocked_on);
WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -523,7 +523,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
next = rt_mutex_top_waiter(lock);
plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
}
- spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
wake_up_process(pendowner);
}
@@ -541,15 +541,15 @@ static void remove_waiter(struct rt_mutex *lock,
unsigned long flags;
int chain_walk = 0;
- spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
waiter->task = NULL;
current->pi_blocked_on = NULL;
- spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
if (first && owner != current) {
- spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
@@ -564,7 +564,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (owner->pi_blocked_on)
chain_walk = 1;
- spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
}
WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -575,11 +575,11 @@ static void remove_waiter(struct rt_mutex *lock,
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(owner);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
}
/*
@@ -592,15 +592,15 @@ void rt_mutex_adjust_pi(struct task_struct *task)
struct rt_mutex_waiter *waiter;
unsigned long flags;
- spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
if (!waiter || waiter->list_entry.prio == task->prio) {
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
- spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(task);
@@ -672,14 +672,14 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
if (waiter->task)
schedule_rt_mutex(lock);
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
set_current_state(state);
}
@@ -700,11 +700,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock)) {
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -731,7 +731,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
fixup_rt_mutex_waiters(lock);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
/* Remove pending timer: */
if (unlikely(timeout))
@@ -758,7 +758,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
{
int ret = 0;
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
if (likely(rt_mutex_owner(lock) != current)) {
@@ -770,7 +770,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
fixup_rt_mutex_waiters(lock);
}
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
return ret;
}
@@ -781,7 +781,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
static void __sched
rt_mutex_slowunlock(struct rt_mutex *lock)
{
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
debug_rt_mutex_unlock(lock);
@@ -789,13 +789,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
if (!rt_mutex_has_waiters(lock)) {
lock->owner = NULL;
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
return;
}
wakeup_next_waiter(lock);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
/* Undo pi boosting if necessary: */
rt_mutex_adjust_prio(current);
@@ -970,8 +970,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
- spin_lock_init(&lock->wait_lock);
- plist_head_init(&lock->wait_list, &lock->wait_lock);
+ raw_spin_lock_init(&lock->wait_lock);
+ plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
debug_rt_mutex_init(lock, name);
}
@@ -1032,7 +1032,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
{
int ret;
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
mark_rt_mutex_waiters(lock);
@@ -1040,7 +1040,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
/* We got the lock for task. */
debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, task, 0);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
@@ -1056,7 +1056,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
*/
ret = 0;
}
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1106,7 +1106,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
{
int ret;
- spin_lock(&lock->wait_lock);
+ raw_spin_lock(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
@@ -1124,7 +1124,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
*/
fixup_rt_mutex_waiters(lock);
- spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&lock->wait_lock);
/*
* Readjust priority, when we did not get the lock. We might have been
diff --git a/kernel/sched.c b/kernel/sched.c
index aa31244caa9f..18cceeecce35 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -141,7 +141,7 @@ struct rt_prio_array {
struct rt_bandwidth {
/* nests inside the rq lock: */
- spinlock_t rt_runtime_lock;
+ raw_spinlock_t rt_runtime_lock;
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
@@ -178,7 +178,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
rt_b->rt_period = ns_to_ktime(period);
rt_b->rt_runtime = runtime;
- spin_lock_init(&rt_b->rt_runtime_lock);
+ raw_spin_lock_init(&rt_b->rt_runtime_lock);
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -200,7 +200,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
if (hrtimer_active(&rt_b->rt_period_timer))
return;
- spin_lock(&rt_b->rt_runtime_lock);
+ raw_spin_lock(&rt_b->rt_runtime_lock);
for (;;) {
unsigned long delta;
ktime_t soft, hard;
@@ -217,7 +217,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
HRTIMER_MODE_ABS_PINNED, 0);
}
- spin_unlock(&rt_b->rt_runtime_lock);
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
}
#ifdef CONFIG_RT_GROUP_SCHED
@@ -298,7 +298,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
#ifdef CONFIG_RT_GROUP_SCHED
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
#endif /* CONFIG_RT_GROUP_SCHED */
#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
@@ -470,7 +470,7 @@ struct rt_rq {
u64 rt_time;
u64 rt_runtime;
/* Nests inside the rq lock: */
- spinlock_t rt_runtime_lock;
+ raw_spinlock_t rt_runtime_lock;
#ifdef CONFIG_RT_GROUP_SCHED
unsigned long rt_nr_boosted;
@@ -525,7 +525,7 @@ static struct root_domain def_root_domain;
*/
struct rq {
/* runqueue lock: */
- spinlock_t lock;
+ raw_spinlock_t lock;
/*
* nr_running and cpu_load should be in the same cacheline because
@@ -685,7 +685,7 @@ inline void update_rq_clock(struct rq *rq)
*/
int runqueue_is_locked(int cpu)
{
- return spin_is_locked(&cpu_rq(cpu)->lock);
+ return raw_spin_is_locked(&cpu_rq(cpu)->lock);
}
/*
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
* default: 0.25ms
*/
unsigned int sysctl_sched_shares_ratelimit = 250000;
+unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
/*
* Inject some fuzzyness into changing the per-cpu group shares
@@ -892,7 +893,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
}
#else /* __ARCH_WANT_UNLOCKED_CTXSW */
@@ -916,9 +917,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
next->oncpu = 1;
#endif
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
#else
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
#endif
}
@@ -948,10 +949,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
{
for (;;) {
struct rq *rq = task_rq(p);
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p)))
return rq;
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
}
}
@@ -968,10 +969,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
for (;;) {
local_irq_save(*flags);
rq = task_rq(p);
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p)))
return rq;
- spin_unlock_irqrestore(&rq->lock, *flags);
+ raw_spin_unlock_irqrestore(&rq->lock, *flags);
}
}
@@ -980,19 +981,19 @@ void task_rq_unlock_wait(struct task_struct *p)
struct rq *rq = task_rq(p);
smp_mb(); /* spin-unlock-wait is not a full memory barrier */
- spin_unlock_wait(&rq->lock);
+ raw_spin_unlock_wait(&rq->lock);
}
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
}
static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
__releases(rq->lock)
{
- spin_unlock_irqrestore(&rq->lock, *flags);
+ raw_spin_unlock_irqrestore(&rq->lock, *flags);
}
/*
@@ -1005,7 +1006,7 @@ static struct rq *this_rq_lock(void)
local_irq_disable();
rq = this_rq();
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
return rq;
}
@@ -1052,10 +1053,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
update_rq_clock(rq);
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
return HRTIMER_NORESTART;
}
@@ -1068,10 +1069,10 @@ static void __hrtick_start(void *arg)
{
struct rq *rq = arg;
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
hrtimer_restart(&rq->hrtick_timer);
rq->hrtick_csd_pending = 0;
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
}
/*
@@ -1178,7 +1179,7 @@ static void resched_task(struct task_struct *p)
{
int cpu;
- assert_spin_locked(&task_rq(p)->lock);
+ assert_raw_spin_locked(&task_rq(p)->lock);
if (test_tsk_need_resched(p))
return;
@@ -1200,10 +1201,10 @@ static void resched_cpu(int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
- if (!spin_trylock_irqsave(&rq->lock, flags))
+ if (!raw_spin_trylock_irqsave(&rq->lock, flags))
return;
resched_task(cpu_curr(cpu));
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
#ifdef CONFIG_NO_HZ
@@ -1272,7 +1273,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
#else /* !CONFIG_SMP */
static void resched_task(struct task_struct *p)
{
- assert_spin_locked(&task_rq(p)->lock);
+ assert_raw_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
}
@@ -1599,11 +1600,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
__set_se_shares(tg->se[cpu], shares);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
}
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
*/
static int tg_shares_up(struct task_group *tg, void *data)
{
- unsigned long weight, rq_weight = 0, shares = 0;
+ unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
unsigned long *usd_rq_weight;
struct sched_domain *sd = data;
unsigned long flags;
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
weight = tg->cfs_rq[i]->load.weight;
usd_rq_weight[i] = weight;
+ rq_weight += weight;
/*
* If there are currently no tasks on the cpu pretend there
* is one of average load so that when a new task gets to
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
if (!weight)
weight = NICE_0_LOAD;
- rq_weight += weight;
+ sum_weight += weight;
shares += tg->cfs_rq[i]->shares;
}
+ if (!rq_weight)
+ rq_weight = sum_weight;
+
if ((!shares && rq_weight) || shares > tg->shares)
shares = tg->shares;
@@ -1701,9 +1706,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
if (root_task_group_empty())
return;
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
update_shares(sd);
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
}
static void update_h_load(long cpu)
@@ -1743,7 +1748,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
- spin_unlock(&this_rq->lock);
+ raw_spin_unlock(&this_rq->lock);
double_rq_lock(this_rq, busiest);
return 1;
@@ -1764,14 +1769,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
{
int ret = 0;
- if (unlikely(!spin_trylock(&busiest->lock))) {
+ if (unlikely(!raw_spin_trylock(&busiest->lock))) {
if (busiest < this_rq) {
- spin_unlock(&this_rq->lock);
- spin_lock(&busiest->lock);
- spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_unlock(&this_rq->lock);
+ raw_spin_lock(&busiest->lock);
+ raw_spin_lock_nested(&this_rq->lock,
+ SINGLE_DEPTH_NESTING);
ret = 1;
} else
- spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_nested(&busiest->lock,
+ SINGLE_DEPTH_NESTING);
}
return ret;
}
@@ -1785,7 +1792,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
{
if (unlikely(!irqs_disabled())) {
/* printk() doesn't work good under rq->lock */
- spin_unlock(&this_rq->lock);
+ raw_spin_unlock(&this_rq->lock);
BUG_ON(1);
}
@@ -1795,7 +1802,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
__releases(busiest->lock)
{
- spin_unlock(&busiest->lock);
+ raw_spin_unlock(&busiest->lock);
lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
}
#endif
@@ -1810,6 +1817,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
#endif
static void calc_load_account_active(struct rq *this_rq);
+static void update_sysctl(void);
+static int get_update_sysctl_factor(void);
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+ /*
+ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+ * successfuly executed on another CPU. We must ensure that updates of
+ * per-task data have been completed by this moment.
+ */
+ smp_wmb();
+ task_thread_info(p)->cpu = cpu;
+#endif
+}
#include "sched_stats.h"
#include "sched_idletask.c"
@@ -1967,20 +1990,6 @@ inline int task_curr(const struct task_struct *p)
return cpu_curr(task_cpu(p)) == p;
}
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
- set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
- /*
- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
- * successfuly executed on another CPU. We must ensure that updates of
- * per-task data have been completed by this moment.
- */
- smp_wmb();
- task_thread_info(p)->cpu = cpu;
-#endif
-}
-
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
int oldprio, int running)
@@ -2016,13 +2025,13 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
return;
}
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
set_task_cpu(p, cpu);
p->cpus_allowed = cpumask_of_cpu(cpu);
p->rt.nr_cpus_allowed = 1;
p->flags |= PF_THREAD_BOUND;
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
EXPORT_SYMBOL(kthread_bind);
@@ -2060,29 +2069,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
int old_cpu = task_cpu(p);
- struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
struct cfs_rq *old_cfsrq = task_cfs_rq(p),
*new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
- u64 clock_offset;
-
- clock_offset = old_rq->clock - new_rq->clock;
trace_sched_migrate_task(p, new_cpu);
-#ifdef CONFIG_SCHEDSTATS
- if (p->se.wait_start)
- p->se.wait_start -= clock_offset;
- if (p->se.sleep_start)
- p->se.sleep_start -= clock_offset;
- if (p->se.block_start)
- p->se.block_start -= clock_offset;
-#endif
if (old_cpu != new_cpu) {
p->se.nr_migrations++;
-#ifdef CONFIG_SCHEDSTATS
- if (task_hot(p, old_rq->clock, NULL))
- schedstat_inc(p, se.nr_forced2_migrations);
-#endif
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
1, 1, NULL, 0);
}
@@ -2323,6 +2316,14 @@ void task_oncpu_function_call(struct task_struct *p,
preempt_enable();
}
+#ifdef CONFIG_SMP
+static inline
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+{
+ return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+}
+#endif
+
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
@@ -2374,17 +2375,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
p->state = TASK_WAKING;
- task_rq_unlock(rq, &flags);
+ __task_rq_unlock(rq);
- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
- if (cpu != orig_cpu) {
- local_irq_save(flags);
- rq = cpu_rq(cpu);
- update_rq_clock(rq);
+ cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+ if (cpu != orig_cpu)
set_task_cpu(p, cpu);
- local_irq_restore(flags);
- }
- rq = task_rq_lock(p, &flags);
+
+ rq = __task_rq_lock(p);
+ update_rq_clock(rq);
WARN_ON(p->state != TASK_WAKING);
cpu = task_cpu(p);
@@ -2499,7 +2497,6 @@ static void __sched_fork(struct task_struct *p)
p->se.avg_overlap = 0;
p->se.start_runtime = 0;
p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
- p->se.avg_running = 0;
#ifdef CONFIG_SCHEDSTATS
p->se.wait_start = 0;
@@ -2521,7 +2518,6 @@ static void __sched_fork(struct task_struct *p)
p->se.nr_failed_migrations_running = 0;
p->se.nr_failed_migrations_hot = 0;
p->se.nr_forced_migrations = 0;
- p->se.nr_forced2_migrations = 0;
p->se.nr_wakeups = 0;
p->se.nr_wakeups_sync = 0;
@@ -2558,7 +2554,6 @@ static void __sched_fork(struct task_struct *p)
void sched_fork(struct task_struct *p, int clone_flags)
{
int cpu = get_cpu();
- unsigned long flags;
__sched_fork(p);
@@ -2592,13 +2587,13 @@ void sched_fork(struct task_struct *p, int clone_flags)
if (!rt_prio(p->prio))
p->sched_class = &fair_sched_class;
+ if (p->sched_class->task_fork)
+ p->sched_class->task_fork(p);
+
#ifdef CONFIG_SMP
- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+ cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
#endif
- local_irq_save(flags);
- update_rq_clock(cpu_rq(cpu));
set_task_cpu(p, cpu);
- local_irq_restore(flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
@@ -2631,17 +2626,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
update_rq_clock(rq);
-
- if (!p->sched_class->task_new || !current->se.on_rq) {
- activate_task(rq, p, 0);
- } else {
- /*
- * Let the scheduling class do new task startup
- * management (if any):
- */
- p->sched_class->task_new(rq, p);
- inc_nr_running(rq);
- }
+ activate_task(rq, p, 0);
trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
@@ -2798,10 +2783,10 @@ static inline void post_schedule(struct rq *rq)
if (rq->post_schedule) {
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->curr->sched_class->post_schedule)
rq->curr->sched_class->post_schedule(rq);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
rq->post_schedule = 0;
}
@@ -3083,15 +3068,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
{
BUG_ON(!irqs_disabled());
if (rq1 == rq2) {
- spin_lock(&rq1->lock);
+ raw_spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */
} else {
if (rq1 < rq2) {
- spin_lock(&rq1->lock);
- spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&rq1->lock);
+ raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock(&rq2->lock);
- spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&rq2->lock);
+ raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
}
}
update_rq_clock(rq1);
@@ -3108,9 +3093,9 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
- spin_unlock(&rq1->lock);
+ raw_spin_unlock(&rq1->lock);
if (rq1 != rq2)
- spin_unlock(&rq2->lock);
+ raw_spin_unlock(&rq2->lock);
else
__release(rq2->lock);
}
@@ -3156,7 +3141,7 @@ out:
void sched_exec(void)
{
int new_cpu, this_cpu = get_cpu();
- new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
+ new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
put_cpu();
if (new_cpu != this_cpu)
sched_migrate_task(current, new_cpu);
@@ -3172,10 +3157,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
activate_task(this_rq, p, 0);
- /*
- * Note that idle threads have a prio of MAX_PRIO, for this test
- * to be always true for them.
- */
check_preempt_curr(this_rq, p, 0);
}
@@ -4134,7 +4115,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
unsigned long flags;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_copy(cpus, cpu_online_mask);
+ cpumask_copy(cpus, cpu_active_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -4207,14 +4188,15 @@ redo:
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
- spin_lock_irqsave(&busiest->lock, flags);
+ raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the migration_thread, if the curr
* task on busiest cpu can't be moved to this_cpu
*/
if (!cpumask_test_cpu(this_cpu,
&busiest->curr->cpus_allowed)) {
- spin_unlock_irqrestore(&busiest->lock, flags);
+ raw_spin_unlock_irqrestore(&busiest->lock,
+ flags);
all_pinned = 1;
goto out_one_pinned;
}
@@ -4224,7 +4206,7 @@ redo:
busiest->push_cpu = this_cpu;
active_balance = 1;
}
- spin_unlock_irqrestore(&busiest->lock, flags);
+ raw_spin_unlock_irqrestore(&busiest->lock, flags);
if (active_balance)
wake_up_process(busiest->migration_thread);
@@ -4297,7 +4279,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
int all_pinned = 0;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_copy(cpus, cpu_online_mask);
+ cpumask_copy(cpus, cpu_active_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -4406,10 +4388,10 @@ redo:
/*
* Should not call ttwu while holding a rq->lock
*/
- spin_unlock(&this_rq->lock);
+ raw_spin_unlock(&this_rq->lock);
if (active_balance)
wake_up_process(busiest->migration_thread);
- spin_lock(&this_rq->lock);
+ raw_spin_lock(&this_rq->lock);
} else
sd->nr_balance_failed = 0;
@@ -4694,7 +4676,7 @@ int select_nohz_load_balancer(int stop_tick)
cpumask_set_cpu(cpu, nohz.cpu_mask);
/* time for ilb owner also to sleep */
- if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+ if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
atomic_set(&nohz.load_balancer, -1);
return 0;
@@ -5278,11 +5260,11 @@ void scheduler_tick(void)
sched_clock_tick();
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
update_rq_clock(rq);
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
perf_event_task_tick(curr, cpu);
@@ -5396,13 +5378,14 @@ static inline void schedule_debug(struct task_struct *prev)
#endif
}
-static void put_prev_task(struct rq *rq, struct task_struct *p)
+static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
- u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
+ if (prev->state == TASK_RUNNING) {
+ u64 runtime = prev->se.sum_exec_runtime;
- update_avg(&p->se.avg_running, runtime);
+ runtime -= prev->se.prev_sum_exec_runtime;
+ runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
- if (p->state == TASK_RUNNING) {
/*
* In order to avoid avg_overlap growing stale when we are
* indeed overlapping and hence not getting put to sleep, grow
@@ -5412,12 +5395,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
* correlates to the amount of cache footprint a task can
* build up.
*/
- runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
- update_avg(&p->se.avg_overlap, runtime);
- } else {
- update_avg(&p->se.avg_running, 0);
+ update_avg(&prev->se.avg_overlap, runtime);
}
- p->sched_class->put_prev_task(rq, p);
+ prev->sched_class->put_prev_task(rq, prev);
}
/*
@@ -5478,7 +5458,7 @@ need_resched_nonpreemptible:
if (sched_feat(HRTICK))
hrtick_clear(rq);
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
update_rq_clock(rq);
clear_tsk_need_resched(prev);
@@ -5514,7 +5494,7 @@ need_resched_nonpreemptible:
cpu = smp_processor_id();
rq = cpu_rq(cpu);
} else
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
post_schedule(rq);
@@ -6343,7 +6323,7 @@ recheck:
* make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
*/
- spin_lock_irqsave(&p->pi_lock, flags);
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
/*
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
@@ -6353,7 +6333,7 @@ recheck:
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
__task_rq_unlock(rq);
- spin_unlock_irqrestore(&p->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
update_rq_clock(rq);
@@ -6377,7 +6357,7 @@ recheck:
check_class_changed(rq, p, prev_class, oldprio, running);
}
__task_rq_unlock(rq);
- spin_unlock_irqrestore(&p->pi_lock, flags);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
rt_mutex_adjust_pi(p);
@@ -6631,6 +6611,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
long sched_getaffinity(pid_t pid, struct cpumask *mask)
{
struct task_struct *p;
+ unsigned long flags;
+ struct rq *rq;
int retval;
get_online_cpus();
@@ -6645,7 +6627,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
if (retval)
goto out_unlock;
+ rq = task_rq_lock(p, &flags);
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+ task_rq_unlock(rq, &flags);
out_unlock:
read_unlock(&tasklist_lock);
@@ -6703,7 +6687,7 @@ SYSCALL_DEFINE0(sched_yield)
*/
__release(rq->lock);
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
- _raw_spin_unlock(&rq->lock);
+ do_raw_spin_unlock(&rq->lock);
preempt_enable_no_resched();
schedule();
@@ -6883,6 +6867,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
{
struct task_struct *p;
unsigned int time_slice;
+ unsigned long flags;
+ struct rq *rq;
int retval;
struct timespec t;
@@ -6899,7 +6885,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
if (retval)
goto out_unlock;
- time_slice = p->sched_class->get_rr_interval(p);
+ rq = task_rq_lock(p, &flags);
+ time_slice = p->sched_class->get_rr_interval(rq, p);
+ task_rq_unlock(rq, &flags);
read_unlock(&tasklist_lock);
jiffies_to_timespec(time_slice, &t);
@@ -6995,12 +6983,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
__sched_fork(idle);
idle->se.exec_start = sched_clock();
- idle->prio = idle->normal_prio = MAX_PRIO;
cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
__set_task_cpu(idle, cpu);
@@ -7008,7 +6995,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
#endif
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
/* Set the preempt count _outside_ the spinlocks! */
#if defined(CONFIG_PREEMPT)
@@ -7041,22 +7028,43 @@ cpumask_var_t nohz_cpu_mask;
*
* This idea comes from the SD scheduler of Con Kolivas:
*/
-static inline void sched_init_granularity(void)
+static int get_update_sysctl_factor(void)
{
- unsigned int factor = 1 + ilog2(num_online_cpus());
- const unsigned long limit = 200000000;
+ unsigned int cpus = min_t(int, num_online_cpus(), 8);
+ unsigned int factor;
+
+ switch (sysctl_sched_tunable_scaling) {
+ case SCHED_TUNABLESCALING_NONE:
+ factor = 1;
+ break;
+ case SCHED_TUNABLESCALING_LINEAR:
+ factor = cpus;
+ break;
+ case SCHED_TUNABLESCALING_LOG:
+ default:
+ factor = 1 + ilog2(cpus);
+ break;
+ }
- sysctl_sched_min_granularity *= factor;
- if (sysctl_sched_min_granularity > limit)
- sysctl_sched_min_granularity = limit;
+ return factor;
+}
- sysctl_sched_latency *= factor;
- if (sysctl_sched_latency > limit)
- sysctl_sched_latency = limit;
+static void update_sysctl(void)
+{
+ unsigned int factor = get_update_sysctl_factor();
- sysctl_sched_wakeup_granularity *= factor;
+#define SET_SYSCTL(name) \
+ (sysctl_##name = (factor) * normalized_sysctl_##name)
+ SET_SYSCTL(sched_min_granularity);
+ SET_SYSCTL(sched_latency);
+ SET_SYSCTL(sched_wakeup_granularity);
+ SET_SYSCTL(sched_shares_ratelimit);
+#undef SET_SYSCTL
+}
- sysctl_sched_shares_ratelimit *= factor;
+static inline void sched_init_granularity(void)
+{
+ update_sysctl();
}
#ifdef CONFIG_SMP
@@ -7093,7 +7101,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
int ret = 0;
rq = task_rq_lock(p, &flags);
- if (!cpumask_intersects(new_mask, cpu_online_mask)) {
+ if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
goto out;
}
@@ -7115,7 +7123,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+ if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
/* Need help from migration thread: drop lock and wait. */
struct task_struct *mt = rq->migration_thread;
@@ -7204,10 +7212,10 @@ static int migration_thread(void *data)
struct migration_req *req;
struct list_head *head;
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
if (cpu_is_offline(cpu)) {
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
break;
}
@@ -7219,7 +7227,7 @@ static int migration_thread(void *data)
head = &rq->migration_queue;
if (list_empty(head)) {
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
schedule();
set_current_state(TASK_INTERRUPTIBLE);
continue;
@@ -7228,14 +7236,14 @@ static int migration_thread(void *data)
list_del_init(head->next);
if (req->task != NULL) {
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
__migrate_task(req->task, cpu, req->dest_cpu);
} else if (likely(cpu == (badcpu = smp_processor_id()))) {
req->dest_cpu = RCU_MIGRATION_GOT_QS;
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
} else {
req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
}
local_irq_enable();
@@ -7269,19 +7277,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
again:
/* Look for allowed, online CPU in same node. */
- for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
goto move;
/* Any allowed, online CPU? */
- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
if (dest_cpu < nr_cpu_ids)
goto move;
/* No more Mr. Nice Guy. */
if (dest_cpu >= nr_cpu_ids) {
cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
- dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
+ dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
/*
* Don't tell them about moving exiting tasks or
@@ -7310,7 +7318,7 @@ move:
*/
static void migrate_nr_uninterruptible(struct rq *rq_src)
{
- struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
+ struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
unsigned long flags;
local_irq_save(flags);
@@ -7358,14 +7366,14 @@ void sched_idle_next(void)
* Strictly not necessary since rest of the CPUs are stopped by now
* and interrupts disabled on the current cpu.
*/
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
update_rq_clock(rq);
activate_task(rq, p, 0);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
/*
@@ -7401,9 +7409,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
* that's OK. No task can be added to this CPU, so iteration is
* fine.
*/
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
move_task_off_dead_cpu(dead_cpu, p);
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
put_task_struct(p);
}
@@ -7444,17 +7452,16 @@ static struct ctl_table sd_ctl_dir[] = {
.procname = "sched_domain",
.mode = 0555,
},
- {0, },
+ {}
};
static struct ctl_table sd_ctl_root[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = sd_ctl_dir,
},
- {0, },
+ {}
};
static struct ctl_table *sd_alloc_ctl_entry(int n)
@@ -7564,7 +7571,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
static struct ctl_table_header *sd_sysctl_header;
static void register_sched_domain_sysctl(void)
{
- int i, cpu_num = num_online_cpus();
+ int i, cpu_num = num_possible_cpus();
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
char buf[32];
@@ -7574,7 +7581,7 @@ static void register_sched_domain_sysctl(void)
if (entry == NULL)
return;
- for_each_online_cpu(i) {
+ for_each_possible_cpu(i) {
snprintf(buf, 32, "cpu%d", i);
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0555;
@@ -7670,13 +7677,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
/* Update our root-domain */
rq = cpu_rq(cpu);
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
#ifdef CONFIG_HOTPLUG_CPU
@@ -7701,14 +7708,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
put_task_struct(rq->migration_thread);
rq->migration_thread = NULL;
/* Idle task back to normal (off runqueue, low prio) */
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
update_rq_clock(rq);
deactivate_task(rq, rq->idle, 0);
- rq->idle->static_prio = MAX_PRIO;
__setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
rq->idle->sched_class = &idle_sched_class;
migrate_dead_tasks(cpu);
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
cpuset_unlock();
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);
@@ -7718,30 +7724,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
* they didn't take sched_hotcpu_mutex. Just wake up
* the requestors.
*/
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
while (!list_empty(&rq->migration_queue)) {
struct migration_req *req;
req = list_entry(rq->migration_queue.next,
struct migration_req, list);
list_del_init(&req->list);
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
complete(&req->done);
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
}
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
/* Update our root-domain */
rq = cpu_rq(cpu);
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
break;
#endif
}
@@ -7971,7 +7977,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
struct root_domain *old_rd = NULL;
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
old_rd = rq->rd;
@@ -7997,7 +8003,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
set_rq_online(rq);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
if (old_rd)
free_rootdomain(old_rd);
@@ -8283,14 +8289,14 @@ enum s_alloc {
*/
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
static int
cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
struct sched_group **sg, struct cpumask *unused)
{
if (sg)
- *sg = &per_cpu(sched_group_cpus, cpu).sg;
+ *sg = &per_cpu(sched_groups, cpu).sg;
return cpu;
}
#endif /* CONFIG_SCHED_SMT */
@@ -9100,7 +9106,7 @@ match1:
if (doms_new == NULL) {
ndoms_cur = 0;
doms_new = &fallback_doms;
- cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
+ cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
WARN_ON_ONCE(dattr_new);
}
@@ -9231,8 +9237,10 @@ static int update_sched_domains(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
partition_sched_domains(1, NULL, NULL);
return NOTIFY_OK;
@@ -9279,7 +9287,7 @@ void __init sched_init_smp(void)
#endif
get_online_cpus();
mutex_lock(&sched_domains_mutex);
- arch_init_sched_domains(cpu_online_mask);
+ arch_init_sched_domains(cpu_active_mask);
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
if (cpumask_empty(non_isolated_cpus))
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -9352,13 +9360,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
- plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
+ plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
#endif
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
rt_rq->rt_runtime = 0;
- spin_lock_init(&rt_rq->rt_runtime_lock);
+ raw_spin_lock_init(&rt_rq->rt_runtime_lock);
#ifdef CONFIG_RT_GROUP_SCHED
rt_rq->rt_nr_boosted = 0;
@@ -9518,7 +9526,7 @@ void __init sched_init(void)
struct rq *rq;
rq = cpu_rq(i);
- spin_lock_init(&rq->lock);
+ raw_spin_lock_init(&rq->lock);
rq->nr_running = 0;
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
@@ -9578,7 +9586,7 @@ void __init sched_init(void)
#elif defined CONFIG_USER_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
init_tg_rt_entry(&init_task_group,
- &per_cpu(init_rt_rq, i),
+ &per_cpu(init_rt_rq_var, i),
&per_cpu(init_sched_rt_entity, i), i, 1,
root_task_group.rt_se[i]);
#endif
@@ -9616,7 +9624,7 @@ void __init sched_init(void)
#endif
#ifdef CONFIG_RT_MUTEXES
- plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+ plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
#endif
/*
@@ -9741,13 +9749,13 @@ void normalize_rt_tasks(void)
continue;
}
- spin_lock(&p->pi_lock);
+ raw_spin_lock(&p->pi_lock);
rq = __task_rq_lock(p);
normalize_task(rq, p);
__task_rq_unlock(rq);
- spin_unlock(&p->pi_lock);
+ raw_spin_unlock(&p->pi_lock);
} while_each_thread(g, p);
read_unlock_irqrestore(&tasklist_lock, flags);
@@ -9843,13 +9851,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
se = kzalloc_node(sizeof(struct sched_entity),
GFP_KERNEL, cpu_to_node(i));
if (!se)
- goto err;
+ goto err_free_rq;
init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
}
return 1;
+ err_free_rq:
+ kfree(cfs_rq);
err:
return 0;
}
@@ -9931,13 +9941,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
GFP_KERNEL, cpu_to_node(i));
if (!rt_se)
- goto err;
+ goto err_free_rq;
init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
}
return 1;
+ err_free_rq:
+ kfree(rt_rq);
err:
return 0;
}
@@ -10106,9 +10118,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
struct rq *rq = cfs_rq->rq;
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
__set_se_shares(se, shares);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
static DEFINE_MUTEX(shares_mutex);
@@ -10293,18 +10305,18 @@ static int tg_set_bandwidth(struct task_group *tg,
if (err)
goto unlock;
- spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+ raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
tg->rt_bandwidth.rt_runtime = rt_runtime;
for_each_possible_cpu(i) {
struct rt_rq *rt_rq = tg->rt_rq[i];
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_runtime = rt_runtime;
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
}
- spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+ raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
unlock:
read_unlock(&tasklist_lock);
mutex_unlock(&rt_constraints_mutex);
@@ -10409,15 +10421,15 @@ static int sched_rt_global_constraints(void)
if (sysctl_sched_rt_runtime == 0)
return -EBUSY;
- spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
+ raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
for_each_possible_cpu(i) {
struct rt_rq *rt_rq = &cpu_rq(i)->rt;
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_runtime = global_rt_runtime();
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
}
- spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
+ raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
return 0;
}
@@ -10708,9 +10720,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
/*
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
*/
- spin_lock_irq(&cpu_rq(cpu)->lock);
+ raw_spin_lock_irq(&cpu_rq(cpu)->lock);
data = *cpuusage;
- spin_unlock_irq(&cpu_rq(cpu)->lock);
+ raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
data = *cpuusage;
#endif
@@ -10726,9 +10738,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
/*
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
*/
- spin_lock_irq(&cpu_rq(cpu)->lock);
+ raw_spin_lock_irq(&cpu_rq(cpu)->lock);
*cpuusage = val;
- spin_unlock_irq(&cpu_rq(cpu)->lock);
+ raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#else
*cpuusage = val;
#endif
@@ -10962,9 +10974,9 @@ void synchronize_sched_expedited(void)
init_completion(&req->done);
req->task = NULL;
req->dest_cpu = RCU_MIGRATION_NEED_QS;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
list_add(&req->list, &rq->migration_queue);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
wake_up_process(rq->migration_thread);
}
for_each_online_cpu(cpu) {
@@ -10972,11 +10984,11 @@ void synchronize_sched_expedited(void)
req = &per_cpu(rcu_migration_req, cpu);
rq = cpu_rq(cpu);
wait_for_completion(&req->done);
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
need_full_sync = 1;
req->dest_cpu = RCU_MIGRATION_IDLE;
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
synchronize_sched_expedited_count++;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 0f052fc674d5..597b33099dfa 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -135,26 +135,26 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
if (likely(newpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
- spin_lock_irqsave(&vec->lock, flags);
+ raw_spin_lock_irqsave(&vec->lock, flags);
cpumask_set_cpu(cpu, vec->mask);
vec->count++;
if (vec->count == 1)
set_bit(newpri, cp->pri_active);
- spin_unlock_irqrestore(&vec->lock, flags);
+ raw_spin_unlock_irqrestore(&vec->lock, flags);
}
if (likely(oldpri != CPUPRI_INVALID)) {
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
- spin_lock_irqsave(&vec->lock, flags);
+ raw_spin_lock_irqsave(&vec->lock, flags);
vec->count--;
if (!vec->count)
clear_bit(oldpri, cp->pri_active);
cpumask_clear_cpu(cpu, vec->mask);
- spin_unlock_irqrestore(&vec->lock, flags);
+ raw_spin_unlock_irqrestore(&vec->lock, flags);
}
*currpri = newpri;
@@ -180,7 +180,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
- spin_lock_init(&vec->lock);
+ raw_spin_lock_init(&vec->lock);
vec->count = 0;
if (!zalloc_cpumask_var(&vec->mask, gfp))
goto cleanup;
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index 9a7e859b8fbf..7cb5bb6b95be 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -12,7 +12,7 @@
/* values 2-101 are RT priorities 0-99 */
struct cpupri_vec {
- spinlock_t lock;
+ raw_spinlock_t lock;
int count;
cpumask_var_t mask;
};
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6988cf08f705..67f95aada4b9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -184,7 +184,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
SPLIT_NS(cfs_rq->exec_clock));
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (cfs_rq->rb_leftmost)
MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
last = __pick_last_entity(cfs_rq);
@@ -192,7 +192,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
max_vruntime = last->vruntime;
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
SPLIT_NS(MIN_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu)
print_rq(m, rq, cpu);
}
+static const char *sched_tunable_scaling_names[] = {
+ "none",
+ "logaritmic",
+ "linear"
+};
+
static int sched_debug_show(struct seq_file *m, void *v)
{
u64 now = ktime_to_ns(ktime_get());
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v)
#undef PN
#undef P
+ SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
+ sysctl_sched_tunable_scaling,
+ sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+
for_each_online_cpu(cpu)
print_cpu(m, cpu);
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
PN(se.sum_exec_runtime);
PN(se.avg_overlap);
PN(se.avg_wakeup);
- PN(se.avg_running);
nr_switches = p->nvcsw + p->nivcsw;
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
P(se.nr_failed_migrations_running);
P(se.nr_failed_migrations_hot);
P(se.nr_forced_migrations);
- P(se.nr_forced2_migrations);
P(se.nr_wakeups);
P(se.nr_wakeups_sync);
P(se.nr_wakeups_migrate);
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p)
p->se.nr_failed_migrations_running = 0;
p->se.nr_failed_migrations_hot = 0;
p->se.nr_forced_migrations = 0;
- p->se.nr_forced2_migrations = 0;
p->se.nr_wakeups = 0;
p->se.nr_wakeups_sync = 0;
p->se.nr_wakeups_migrate = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f61837ad336d..5bedf6e3ebf3 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
*/
#include <linux/latencytop.h>
+#include <linux/sched.h>
/*
* Targeted preemption latency for CPU-bound tasks:
@@ -35,12 +36,26 @@
* run vmstat and monitor the context-switches (cs) field)
*/
unsigned int sysctl_sched_latency = 5000000ULL;
+unsigned int normalized_sysctl_sched_latency = 5000000ULL;
+
+/*
+ * The initial- and re-scaling of tunables is configurable
+ * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
+ *
+ * Options are:
+ * SCHED_TUNABLESCALING_NONE - unscaled, always *1
+ * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
+ * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
+ */
+enum sched_tunable_scaling sysctl_sched_tunable_scaling
+ = SCHED_TUNABLESCALING_LOG;
/*
* Minimal preemption granularity for CPU-bound tasks:
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
unsigned int sysctl_sched_min_granularity = 1000000ULL;
+unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
/*
* is kept at sysctl_sched_latency / sysctl_sched_min_granularity
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
* have immediate wakeup/sleep latencies.
*/
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
+unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
*/
#ifdef CONFIG_SCHED_DEBUG
-int sched_nr_latency_handler(struct ctl_table *table, int write,
+int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ int factor = get_update_sysctl_factor();
if (ret || !write)
return ret;
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
sysctl_sched_min_granularity);
+#define WRT_SYSCTL(name) \
+ (normalized_sysctl_##name = sysctl_##name / (factor))
+ WRT_SYSCTL(sched_min_granularity);
+ WRT_SYSCTL(sched_latency);
+ WRT_SYSCTL(sched_wakeup_granularity);
+ WRT_SYSCTL(sched_shares_ratelimit);
+#undef WRT_SYSCTL
+
return 0;
}
#endif
@@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
new_cpu = prev_cpu;
}
- rcu_read_lock();
for_each_domain(cpu, tmp) {
/*
* If power savings logic is enabled for a domain, see if we
@@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
update_shares(tmp);
}
- if (affine_sd && wake_affine(affine_sd, p, sync)) {
- new_cpu = cpu;
- goto out;
- }
+ if (affine_sd && wake_affine(affine_sd, p, sync))
+ return cpu;
while (sd) {
int load_idx = sd->forkexec_idx;
@@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
/* while loop will break here if sd == NULL */
}
-out:
- rcu_read_unlock();
return new_cpu;
}
#endif /* CONFIG_SMP */
@@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
int sync = wake_flags & WF_SYNC;
int scale = cfs_rq->nr_running >= sched_nr_latency;
- update_curr(cfs_rq);
-
- if (unlikely(rt_prio(p->prio))) {
- resched_task(curr);
- return;
- }
+ if (unlikely(rt_prio(p->prio)))
+ goto preempt;
if (unlikely(p->sched_class != &fair_sched_class))
return;
@@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
/* Idle tasks are by definition preempted by everybody. */
- if (unlikely(curr->policy == SCHED_IDLE)) {
- resched_task(curr);
- return;
- }
+ if (unlikely(curr->policy == SCHED_IDLE))
+ goto preempt;
- if ((sched_feat(WAKEUP_SYNC) && sync) ||
- (sched_feat(WAKEUP_OVERLAP) &&
- (se->avg_overlap < sysctl_sched_migration_cost &&
- pse->avg_overlap < sysctl_sched_migration_cost))) {
- resched_task(curr);
- return;
- }
+ if (sched_feat(WAKEUP_SYNC) && sync)
+ goto preempt;
- if (sched_feat(WAKEUP_RUNNING)) {
- if (pse->avg_running < se->avg_running) {
- set_next_buddy(pse);
- resched_task(curr);
- return;
- }
- }
+ if (sched_feat(WAKEUP_OVERLAP) &&
+ se->avg_overlap < sysctl_sched_migration_cost &&
+ pse->avg_overlap < sysctl_sched_migration_cost)
+ goto preempt;
if (!sched_feat(WAKEUP_PREEMPT))
return;
+ update_curr(cfs_rq);
find_matching_se(&se, &pse);
-
BUG_ON(!pse);
+ if (wakeup_preempt_entity(se, pse) == 1)
+ goto preempt;
- if (wakeup_preempt_entity(se, pse) == 1) {
- resched_task(curr);
- /*
- * Only set the backward buddy when the current task is still
- * on the rq. This can happen when a wakeup gets interleaved
- * with schedule on the ->pre_schedule() or idle_balance()
- * point, either of which can * drop the rq lock.
- *
- * Also, during early boot the idle thread is in the fair class,
- * for obvious reasons its a bad idea to schedule back to it.
- */
- if (unlikely(!se->on_rq || curr == rq->idle))
- return;
- if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
- set_last_buddy(se);
- }
+ return;
+
+preempt:
+ resched_task(curr);
+ /*
+ * Only set the backward buddy when the current task is still
+ * on the rq. This can happen when a wakeup gets interleaved
+ * with schedule on the ->pre_schedule() or idle_balance()
+ * point, either of which can * drop the rq lock.
+ *
+ * Also, during early boot the idle thread is in the fair class,
+ * for obvious reasons its a bad idea to schedule back to it.
+ */
+ if (unlikely(!se->on_rq || curr == rq->idle))
+ return;
+
+ if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
+ set_last_buddy(se);
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
return 0;
}
+
+static void rq_online_fair(struct rq *rq)
+{
+ update_sysctl();
+}
+
+static void rq_offline_fair(struct rq *rq)
+{
+ update_sysctl();
+}
+
#endif /* CONFIG_SMP */
/*
@@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
}
/*
- * Share the fairness runtime between parent and child, thus the
- * total amount of pressure for CPU stays equal - new tasks
- * get a chance to run but frequent forkers are not allowed to
- * monopolize the CPU. Note: the parent runqueue is locked,
- * the child is not running yet.
+ * called on fork with the child task as argument from the parent's context
+ * - child not yet on the tasklist
+ * - preemption disabled
*/
-static void task_new_fair(struct rq *rq, struct task_struct *p)
+static void task_fork_fair(struct task_struct *p)
{
- struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ struct cfs_rq *cfs_rq = task_cfs_rq(current);
struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
int this_cpu = smp_processor_id();
+ struct rq *rq = this_rq();
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
- sched_info_queued(p);
+ if (unlikely(task_cpu(p) != this_cpu))
+ __set_task_cpu(p, this_cpu);
update_curr(cfs_rq);
+
if (curr)
se->vruntime = curr->vruntime;
place_entity(cfs_rq, se, 1);
- /* 'curr' will be NULL if the child belongs to a different group */
- if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
- curr && entity_before(curr, se)) {
+ if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
/*
* Upon rescheduling, sched_class::put_prev_task() will place
* 'current' within the tree based on its new key value.
@@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
resched_task(rq->curr);
}
- enqueue_task_fair(rq, p, 0);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
/*
@@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p)
}
#endif
-unsigned int get_rr_interval_fair(struct task_struct *task)
+unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
{
struct sched_entity *se = &task->se;
- unsigned long flags;
- struct rq *rq;
unsigned int rr_interval = 0;
/*
* Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
* idle runqueue:
*/
- rq = task_rq_lock(task, &flags);
if (rq->cfs.load.weight)
rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
- task_rq_unlock(rq, &flags);
return rr_interval;
}
@@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = {
.load_balance = load_balance_fair,
.move_one_task = move_one_task_fair,
+ .rq_online = rq_online_fair,
+ .rq_offline = rq_offline_fair,
#endif
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
- .task_new = task_new_fair,
+ .task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
.switched_to = switched_to_fair,
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d94083582c7..d5059fd761d9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
SCHED_FEAT(WAKEUP_OVERLAP, 0)
/*
- * Wakeup preemption towards tasks that run short
- */
-SCHED_FEAT(WAKEUP_RUNNING, 0)
-
-/*
* Use the SYNC wakeup hint, pipes and the likes use this to indicate
* the remote end is likely to consume the data we just wrote, and
* therefore has cache benefit from being placed on the same cpu, see
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index b133a28fcde3..5f93b570d383 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -34,10 +34,10 @@ static struct task_struct *pick_next_task_idle(struct rq *rq)
static void
dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
{
- spin_unlock_irq(&rq->lock);
+ raw_spin_unlock_irq(&rq->lock);
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
dump_stack();
- spin_lock_irq(&rq->lock);
+ raw_spin_lock_irq(&rq->lock);
}
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
check_preempt_curr(rq, p, 0);
}
-unsigned int get_rr_interval_idle(struct task_struct *task)
+unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
{
return 0;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5c5fef378415..d2ea2828164e 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -327,7 +327,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
weight = cpumask_weight(rd->span);
- spin_lock(&rt_b->rt_runtime_lock);
+ raw_spin_lock(&rt_b->rt_runtime_lock);
rt_period = ktime_to_ns(rt_b->rt_period);
for_each_cpu(i, rd->span) {
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
@@ -336,7 +336,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
if (iter == rt_rq)
continue;
- spin_lock(&iter->rt_runtime_lock);
+ raw_spin_lock(&iter->rt_runtime_lock);
/*
* Either all rqs have inf runtime and there's nothing to steal
* or __disable_runtime() below sets a specific rq to inf to
@@ -358,14 +358,14 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
rt_rq->rt_runtime += diff;
more = 1;
if (rt_rq->rt_runtime == rt_period) {
- spin_unlock(&iter->rt_runtime_lock);
+ raw_spin_unlock(&iter->rt_runtime_lock);
break;
}
}
next:
- spin_unlock(&iter->rt_runtime_lock);
+ raw_spin_unlock(&iter->rt_runtime_lock);
}
- spin_unlock(&rt_b->rt_runtime_lock);
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
return more;
}
@@ -386,8 +386,8 @@ static void __disable_runtime(struct rq *rq)
s64 want;
int i;
- spin_lock(&rt_b->rt_runtime_lock);
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_b->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
/*
* Either we're all inf and nobody needs to borrow, or we're
* already disabled and thus have nothing to do, or we have
@@ -396,7 +396,7 @@ static void __disable_runtime(struct rq *rq)
if (rt_rq->rt_runtime == RUNTIME_INF ||
rt_rq->rt_runtime == rt_b->rt_runtime)
goto balanced;
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
/*
* Calculate the difference between what we started out with
@@ -418,7 +418,7 @@ static void __disable_runtime(struct rq *rq)
if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
continue;
- spin_lock(&iter->rt_runtime_lock);
+ raw_spin_lock(&iter->rt_runtime_lock);
if (want > 0) {
diff = min_t(s64, iter->rt_runtime, want);
iter->rt_runtime -= diff;
@@ -427,13 +427,13 @@ static void __disable_runtime(struct rq *rq)
iter->rt_runtime -= want;
want -= want;
}
- spin_unlock(&iter->rt_runtime_lock);
+ raw_spin_unlock(&iter->rt_runtime_lock);
if (!want)
break;
}
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
/*
* We cannot be left wanting - that would mean some runtime
* leaked out of the system.
@@ -445,8 +445,8 @@ balanced:
* runtime - in which case borrowing doesn't make sense.
*/
rt_rq->rt_runtime = RUNTIME_INF;
- spin_unlock(&rt_rq->rt_runtime_lock);
- spin_unlock(&rt_b->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
}
}
@@ -454,9 +454,9 @@ static void disable_runtime(struct rq *rq)
{
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
__disable_runtime(rq);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
static void __enable_runtime(struct rq *rq)
@@ -472,13 +472,13 @@ static void __enable_runtime(struct rq *rq)
for_each_leaf_rt_rq(rt_rq, rq) {
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
- spin_lock(&rt_b->rt_runtime_lock);
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_b->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_runtime = rt_b->rt_runtime;
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
- spin_unlock(&rt_rq->rt_runtime_lock);
- spin_unlock(&rt_b->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
}
}
@@ -486,9 +486,9 @@ static void enable_runtime(struct rq *rq)
{
unsigned long flags;
- spin_lock_irqsave(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq->lock, flags);
__enable_runtime(rq);
- spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
}
static int balance_runtime(struct rt_rq *rt_rq)
@@ -496,9 +496,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
int more = 0;
if (rt_rq->rt_time > rt_rq->rt_runtime) {
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
more = do_balance_runtime(rt_rq);
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
}
return more;
@@ -524,11 +524,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
struct rq *rq = rq_of_rt_rq(rt_rq);
- spin_lock(&rq->lock);
+ raw_spin_lock(&rq->lock);
if (rt_rq->rt_time) {
u64 runtime;
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
if (rt_rq->rt_throttled)
balance_runtime(rt_rq);
runtime = rt_rq->rt_runtime;
@@ -539,13 +539,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
}
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
} else if (rt_rq->rt_nr_running)
idle = 0;
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
- spin_unlock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
}
return idle;
@@ -624,11 +624,11 @@ static void update_curr_rt(struct rq *rq)
rt_rq = rt_rq_of_se(rt_se);
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
- spin_lock(&rt_rq->rt_runtime_lock);
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
- spin_unlock(&rt_rq->rt_runtime_lock);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
}
}
}
@@ -1246,7 +1246,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
task_running(rq, task) ||
!task->se.on_rq)) {
- spin_unlock(&lowest_rq->lock);
+ raw_spin_unlock(&lowest_rq->lock);
lowest_rq = NULL;
break;
}
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
dequeue_pushable_task(rq, p);
}
-unsigned int get_rr_interval_rt(struct task_struct *task)
+unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{
/*
* Time slice is 0 for SCHED_FIFO tasks
diff --git a/kernel/signal.c b/kernel/signal.c
index f67545f9394c..d09692b40376 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -423,7 +423,7 @@ still_pending:
*/
info->si_signo = sig;
info->si_errno = 0;
- info->si_code = 0;
+ info->si_code = SI_USER;
info->si_pid = 0;
info->si_uid = 0;
}
@@ -607,6 +607,17 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
return 1;
}
+static inline int is_si_special(const struct siginfo *info)
+{
+ return info <= SEND_SIG_FORCED;
+}
+
+static inline bool si_fromuser(const struct siginfo *info)
+{
+ return info == SEND_SIG_NOINFO ||
+ (!is_si_special(info) && SI_FROMUSER(info));
+}
+
/*
* Bad permissions for sending the signal
* - the caller must hold at least the RCU read lock
@@ -621,7 +632,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
if (!valid_signal(sig))
return -EINVAL;
- if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
+ if (!si_fromuser(info))
return 0;
error = audit_signal_info(sig, t); /* Let audit system see the signal */
@@ -949,9 +960,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
int from_ancestor_ns = 0;
#ifdef CONFIG_PID_NS
- if (!is_si_special(info) && SI_FROMUSER(info) &&
- task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
- from_ancestor_ns = 1;
+ from_ancestor_ns = si_fromuser(info) &&
+ !task_pid_nr_ns(current, task_active_pid_ns(t));
#endif
return __send_signal(sig, info, t, group, from_ancestor_ns);
@@ -1052,12 +1062,6 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
return ret;
}
-void
-force_sig_specific(int sig, struct task_struct *t)
-{
- force_sig_info(sig, SEND_SIG_FORCED, t);
-}
-
/*
* Nuke all other threads in the group.
*/
@@ -1187,8 +1191,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
goto out_unlock;
}
pcred = __task_cred(p);
- if ((info == SEND_SIG_NOINFO ||
- (!is_si_special(info) && SI_FROMUSER(info))) &&
+ if (si_fromuser(info) &&
euid != pcred->suid && euid != pcred->uid &&
uid != pcred->suid && uid != pcred->uid) {
ret = -EPERM;
@@ -1840,11 +1843,6 @@ relock:
for (;;) {
struct k_sigaction *ka;
-
- if (unlikely(signal->group_stop_count > 0) &&
- do_signal_stop(0))
- goto relock;
-
/*
* Tracing can induce an artifical signal and choose sigaction.
* The return value in @signr determines the default action,
@@ -1856,6 +1854,10 @@ relock:
if (unlikely(signr != 0))
ka = return_ka;
else {
+ if (unlikely(signal->group_stop_count > 0) &&
+ do_signal_stop(0))
+ goto relock;
+
signr = dequeue_signal(current, &current->blocked,
info);
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 00889bd3c590..7494bbf5a270 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -49,7 +49,6 @@ static const int slow_work_max_vslow = 99;
ctl_table slow_work_sysctls[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "min-threads",
.data = &slow_work_min_threads,
.maxlen = sizeof(unsigned),
@@ -59,7 +58,6 @@ ctl_table slow_work_sysctls[] = {
.extra2 = &slow_work_max_threads,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "max-threads",
.data = &slow_work_max_threads,
.maxlen = sizeof(unsigned),
@@ -69,16 +67,15 @@ ctl_table slow_work_sysctls[] = {
.extra2 = (void *) &slow_work_max_max_threads,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "vslow-percentage",
.data = &vslow_work_proportion,
.maxlen = sizeof(unsigned),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (void *) &slow_work_min_vslow,
.extra2 = (void *) &slow_work_max_vslow,
},
- { .ctl_name = 0 }
+ {}
};
#endif
diff --git a/kernel/smp.c b/kernel/smp.c
index a8c76069cf50..de735a6637d0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,11 +16,11 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
static struct {
struct list_head queue;
- spinlock_t lock;
+ raw_spinlock_t lock;
} call_function __cacheline_aligned_in_smp =
{
.queue = LIST_HEAD_INIT(call_function.queue),
- .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
};
enum {
@@ -35,7 +35,7 @@ struct call_function_data {
struct call_single_queue {
struct list_head list;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
static DEFINE_PER_CPU(struct call_function_data, cfd_data);
@@ -80,7 +80,7 @@ static int __cpuinit init_call_single_data(void)
for_each_possible_cpu(i) {
struct call_single_queue *q = &per_cpu(call_single_queue, i);
- spin_lock_init(&q->lock);
+ raw_spin_lock_init(&q->lock);
INIT_LIST_HEAD(&q->list);
}
@@ -141,10 +141,10 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
unsigned long flags;
int ipi;
- spin_lock_irqsave(&dst->lock, flags);
+ raw_spin_lock_irqsave(&dst->lock, flags);
ipi = list_empty(&dst->list);
list_add_tail(&data->list, &dst->list);
- spin_unlock_irqrestore(&dst->lock, flags);
+ raw_spin_unlock_irqrestore(&dst->lock, flags);
/*
* The list addition should be visible before sending the IPI
@@ -171,7 +171,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
void generic_smp_call_function_interrupt(void)
{
struct call_function_data *data;
- int cpu = get_cpu();
+ int cpu = smp_processor_id();
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
@@ -201,9 +201,9 @@ void generic_smp_call_function_interrupt(void)
refs = atomic_dec_return(&data->refs);
WARN_ON(refs < 0);
if (!refs) {
- spin_lock(&call_function.lock);
+ raw_spin_lock(&call_function.lock);
list_del_rcu(&data->csd.list);
- spin_unlock(&call_function.lock);
+ raw_spin_unlock(&call_function.lock);
}
if (refs)
@@ -212,7 +212,6 @@ void generic_smp_call_function_interrupt(void)
csd_unlock(&data->csd);
}
- put_cpu();
}
/*
@@ -230,9 +229,9 @@ void generic_smp_call_function_single_interrupt(void)
*/
WARN_ON_ONCE(!cpu_online(smp_processor_id()));
- spin_lock(&q->lock);
+ raw_spin_lock(&q->lock);
list_replace_init(&q->list, &list);
- spin_unlock(&q->lock);
+ raw_spin_unlock(&q->lock);
while (!list_empty(&list)) {
struct call_single_data *data;
@@ -449,14 +448,14 @@ void smp_call_function_many(const struct cpumask *mask,
cpumask_clear_cpu(this_cpu, data->cpumask);
atomic_set(&data->refs, cpumask_weight(data->cpumask));
- spin_lock_irqsave(&call_function.lock, flags);
+ raw_spin_lock_irqsave(&call_function.lock, flags);
/*
* Place entry at the _HEAD_ of the list, so that any cpu still
* observing the entry in generic_smp_call_function_interrupt()
* will not miss any other list entries:
*/
list_add_rcu(&data->csd.list, &call_function.queue);
- spin_unlock_irqrestore(&call_function.lock, flags);
+ raw_spin_unlock_irqrestore(&call_function.lock, flags);
/*
* Make the list addition visible before sending the ipi.
@@ -501,20 +500,20 @@ EXPORT_SYMBOL(smp_call_function);
void ipi_call_lock(void)
{
- spin_lock(&call_function.lock);
+ raw_spin_lock(&call_function.lock);
}
void ipi_call_unlock(void)
{
- spin_unlock(&call_function.lock);
+ raw_spin_unlock(&call_function.lock);
}
void ipi_call_lock_irq(void)
{
- spin_lock_irq(&call_function.lock);
+ raw_spin_lock_irq(&call_function.lock);
}
void ipi_call_unlock_irq(void)
{
- spin_unlock_irq(&call_function.lock);
+ raw_spin_unlock_irq(&call_function.lock);
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 21939d9e830e..a09502e2ef75 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -697,7 +697,7 @@ void __init softirq_init(void)
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
-static int ksoftirqd(void * __bind_cpu)
+static int run_ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
@@ -810,7 +810,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+ p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
if (IS_ERR(p)) {
printk("ksoftirqd for %i failed\n", hotcpu);
return NOTIFY_BAD;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 81324d12eb35..d22579087e27 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -22,9 +22,9 @@
static DEFINE_SPINLOCK(print_lock);
-static DEFINE_PER_CPU(unsigned long, touch_timestamp);
-static DEFINE_PER_CPU(unsigned long, print_timestamp);
-static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
+static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
+static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
+static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static int __read_mostly did_panic;
int __read_mostly softlockup_thresh = 60;
@@ -70,12 +70,12 @@ static void __touch_softlockup_watchdog(void)
{
int this_cpu = raw_smp_processor_id();
- __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
+ __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
}
void touch_softlockup_watchdog(void)
{
- __raw_get_cpu_var(touch_timestamp) = 0;
+ __raw_get_cpu_var(softlockup_touch_ts) = 0;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -85,7 +85,7 @@ void touch_all_softlockup_watchdogs(void)
/* Cause each CPU to re-update its timestamp rather than complain */
for_each_online_cpu(cpu)
- per_cpu(touch_timestamp, cpu) = 0;
+ per_cpu(softlockup_touch_ts, cpu) = 0;
}
EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
@@ -104,28 +104,28 @@ int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
void softlockup_tick(void)
{
int this_cpu = smp_processor_id();
- unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
- unsigned long print_timestamp;
+ unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
+ unsigned long print_ts;
struct pt_regs *regs = get_irq_regs();
unsigned long now;
/* Is detection switched off? */
- if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+ if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
/* Be sure we don't false trigger if switched back on */
- if (touch_timestamp)
- per_cpu(touch_timestamp, this_cpu) = 0;
+ if (touch_ts)
+ per_cpu(softlockup_touch_ts, this_cpu) = 0;
return;
}
- if (touch_timestamp == 0) {
+ if (touch_ts == 0) {
__touch_softlockup_watchdog();
return;
}
- print_timestamp = per_cpu(print_timestamp, this_cpu);
+ print_ts = per_cpu(softlockup_print_ts, this_cpu);
/* report at most once a second */
- if (print_timestamp == touch_timestamp || did_panic)
+ if (print_ts == touch_ts || did_panic)
return;
/* do not print during early bootup: */
@@ -140,18 +140,18 @@ void softlockup_tick(void)
* Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
- if (now > touch_timestamp + softlockup_thresh/2)
- wake_up_process(per_cpu(watchdog_task, this_cpu));
+ if (now > touch_ts + softlockup_thresh/2)
+ wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
/* Warn about unreasonable delays: */
- if (now <= (touch_timestamp + softlockup_thresh))
+ if (now <= (touch_ts + softlockup_thresh))
return;
- per_cpu(print_timestamp, this_cpu) = touch_timestamp;
+ per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
spin_lock(&print_lock);
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
- this_cpu, now - touch_timestamp,
+ this_cpu, now - touch_ts,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
@@ -209,32 +209,32 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- BUG_ON(per_cpu(watchdog_task, hotcpu));
+ BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
if (IS_ERR(p)) {
printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
return NOTIFY_BAD;
}
- per_cpu(touch_timestamp, hotcpu) = 0;
- per_cpu(watchdog_task, hotcpu) = p;
+ per_cpu(softlockup_touch_ts, hotcpu) = 0;
+ per_cpu(softlockup_watchdog, hotcpu) = p;
kthread_bind(p, hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- wake_up_process(per_cpu(watchdog_task, hotcpu));
+ wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
- if (!per_cpu(watchdog_task, hotcpu))
+ if (!per_cpu(softlockup_watchdog, hotcpu))
break;
/* Unbind so it can run. Fall thru. */
- kthread_bind(per_cpu(watchdog_task, hotcpu),
+ kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- p = per_cpu(watchdog_task, hotcpu);
- per_cpu(watchdog_task, hotcpu) = NULL;
+ p = per_cpu(softlockup_watchdog, hotcpu);
+ per_cpu(softlockup_watchdog, hotcpu) = NULL;
kthread_stop(p);
break;
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 41e042219ff6..be6517fb9c14 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -32,6 +32,8 @@
* include/linux/spinlock_api_smp.h
*/
#else
+#define raw_read_can_lock(l) read_can_lock(l)
+#define raw_write_can_lock(l) write_can_lock(l)
/*
* We build the __lock_function inlines here. They are too large for
* inlining all over the place, but here is only one user per function
@@ -42,49 +44,49 @@
* towards that other CPU that it should break the lock ASAP.
*/
#define BUILD_LOCK_OPS(op, locktype) \
-void __lockfunc __##op##_lock(locktype##_t *lock) \
+void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
{ \
for (;;) { \
preempt_disable(); \
- if (likely(_raw_##op##_trylock(lock))) \
+ if (likely(do_raw_##op##_trylock(lock))) \
break; \
preempt_enable(); \
\
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
- while (!op##_can_lock(lock) && (lock)->break_lock) \
- _raw_##op##_relax(&lock->raw_lock); \
+ while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
+ arch_##op##_relax(&lock->raw_lock); \
} \
(lock)->break_lock = 0; \
} \
\
-unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \
+unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
{ \
unsigned long flags; \
\
for (;;) { \
preempt_disable(); \
local_irq_save(flags); \
- if (likely(_raw_##op##_trylock(lock))) \
+ if (likely(do_raw_##op##_trylock(lock))) \
break; \
local_irq_restore(flags); \
preempt_enable(); \
\
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
- while (!op##_can_lock(lock) && (lock)->break_lock) \
- _raw_##op##_relax(&lock->raw_lock); \
+ while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\
+ arch_##op##_relax(&lock->raw_lock); \
} \
(lock)->break_lock = 0; \
return flags; \
} \
\
-void __lockfunc __##op##_lock_irq(locktype##_t *lock) \
+void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
{ \
- _##op##_lock_irqsave(lock); \
+ _raw_##op##_lock_irqsave(lock); \
} \
\
-void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
+void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
{ \
unsigned long flags; \
\
@@ -93,7 +95,7 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
/* irq-disabling. We use the generic preemption-aware */ \
/* function: */ \
/**/ \
- flags = _##op##_lock_irqsave(lock); \
+ flags = _raw_##op##_lock_irqsave(lock); \
local_bh_disable(); \
local_irq_restore(flags); \
} \
@@ -107,269 +109,269 @@ void __lockfunc __##op##_lock_bh(locktype##_t *lock) \
* __[spin|read|write]_lock_irqsave()
* __[spin|read|write]_lock_bh()
*/
-BUILD_LOCK_OPS(spin, spinlock);
+BUILD_LOCK_OPS(spin, raw_spinlock);
BUILD_LOCK_OPS(read, rwlock);
BUILD_LOCK_OPS(write, rwlock);
#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK
+int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
{
- preempt_disable();
- spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ return __raw_spin_trylock(lock);
}
-EXPORT_SYMBOL(_spin_lock_nested);
+EXPORT_SYMBOL(_raw_spin_trylock);
+#endif
-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock,
- int subclass)
+#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
+int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
{
- unsigned long flags;
-
- local_irq_save(flags);
- preempt_disable();
- spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock,
- _raw_spin_lock_flags, &flags);
- return flags;
+ return __raw_spin_trylock_bh(lock);
}
-EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+EXPORT_SYMBOL(_raw_spin_trylock_bh);
+#endif
-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
- struct lockdep_map *nest_lock)
+#ifndef CONFIG_INLINE_SPIN_LOCK
+void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
- preempt_disable();
- spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
- LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ __raw_spin_lock(lock);
}
-EXPORT_SYMBOL(_spin_lock_nest_lock);
-
+EXPORT_SYMBOL(_raw_spin_lock);
#endif
-#ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _spin_trylock(spinlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
+unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
- return __spin_trylock(lock);
+ return __raw_spin_lock_irqsave(lock);
}
-EXPORT_SYMBOL(_spin_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_irqsave);
#endif
-#ifndef CONFIG_INLINE_READ_TRYLOCK
-int __lockfunc _read_trylock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
+void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
{
- return __read_trylock(lock);
+ __raw_spin_lock_irq(lock);
}
-EXPORT_SYMBOL(_read_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_irq);
#endif
-#ifndef CONFIG_INLINE_WRITE_TRYLOCK
-int __lockfunc _write_trylock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_LOCK_BH
+void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
- return __write_trylock(lock);
+ __raw_spin_lock_bh(lock);
}
-EXPORT_SYMBOL(_write_trylock);
+EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif
-#ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _read_lock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_UNLOCK
+void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
- __read_lock(lock);
+ __raw_spin_unlock(lock);
}
-EXPORT_SYMBOL(_read_lock);
+EXPORT_SYMBOL(_raw_spin_unlock);
#endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
+void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
{
- return __spin_lock_irqsave(lock);
+ __raw_spin_unlock_irqrestore(lock, flags);
}
-EXPORT_SYMBOL(_spin_lock_irqsave);
+EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
#endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _spin_lock_irq(spinlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
+void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
{
- __spin_lock_irq(lock);
+ __raw_spin_unlock_irq(lock);
}
-EXPORT_SYMBOL(_spin_lock_irq);
+EXPORT_SYMBOL(_raw_spin_unlock_irq);
#endif
-#ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _spin_lock_bh(spinlock_t *lock)
+#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
+void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
{
- __spin_lock_bh(lock);
+ __raw_spin_unlock_bh(lock);
}
-EXPORT_SYMBOL(_spin_lock_bh);
+EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif
-#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_TRYLOCK
+int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
- return __read_lock_irqsave(lock);
+ return __raw_read_trylock(lock);
}
-EXPORT_SYMBOL(_read_lock_irqsave);
+EXPORT_SYMBOL(_raw_read_trylock);
#endif
-#ifndef CONFIG_INLINE_READ_LOCK_IRQ
-void __lockfunc _read_lock_irq(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_LOCK
+void __lockfunc _raw_read_lock(rwlock_t *lock)
{
- __read_lock_irq(lock);
+ __raw_read_lock(lock);
}
-EXPORT_SYMBOL(_read_lock_irq);
+EXPORT_SYMBOL(_raw_read_lock);
#endif
-#ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _read_lock_bh(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
+unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
{
- __read_lock_bh(lock);
+ return __raw_read_lock_irqsave(lock);
}
-EXPORT_SYMBOL(_read_lock_bh);
+EXPORT_SYMBOL(_raw_read_lock_irqsave);
#endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_LOCK_IRQ
+void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
{
- return __write_lock_irqsave(lock);
+ __raw_read_lock_irq(lock);
}
-EXPORT_SYMBOL(_write_lock_irqsave);
+EXPORT_SYMBOL(_raw_read_lock_irq);
#endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _write_lock_irq(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_LOCK_BH
+void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
{
- __write_lock_irq(lock);
+ __raw_read_lock_bh(lock);
}
-EXPORT_SYMBOL(_write_lock_irq);
+EXPORT_SYMBOL(_raw_read_lock_bh);
#endif
-#ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _write_lock_bh(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_UNLOCK
+void __lockfunc _raw_read_unlock(rwlock_t *lock)
{
- __write_lock_bh(lock);
+ __raw_read_unlock(lock);
}
-EXPORT_SYMBOL(_write_lock_bh);
+EXPORT_SYMBOL(_raw_read_unlock);
#endif
-#ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _spin_lock(spinlock_t *lock)
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
+void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
- __spin_lock(lock);
+ __raw_read_unlock_irqrestore(lock, flags);
}
-EXPORT_SYMBOL(_spin_lock);
+EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
#endif
-#ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _write_lock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
+void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
{
- __write_lock(lock);
+ __raw_read_unlock_irq(lock);
}
-EXPORT_SYMBOL(_write_lock);
+EXPORT_SYMBOL(_raw_read_unlock_irq);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK
-void __lockfunc _spin_unlock(spinlock_t *lock)
+#ifndef CONFIG_INLINE_READ_UNLOCK_BH
+void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
{
- __spin_unlock(lock);
+ __raw_read_unlock_bh(lock);
}
-EXPORT_SYMBOL(_spin_unlock);
+EXPORT_SYMBOL(_raw_read_unlock_bh);
#endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _write_unlock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_TRYLOCK
+int __lockfunc _raw_write_trylock(rwlock_t *lock)
{
- __write_unlock(lock);
+ return __raw_write_trylock(lock);
}
-EXPORT_SYMBOL(_write_unlock);
+EXPORT_SYMBOL(_raw_write_trylock);
#endif
-#ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _read_unlock(rwlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_LOCK
+void __lockfunc _raw_write_lock(rwlock_t *lock)
{
- __read_unlock(lock);
+ __raw_write_lock(lock);
}
-EXPORT_SYMBOL(_read_unlock);
+EXPORT_SYMBOL(_raw_write_lock);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
+unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
{
- __spin_unlock_irqrestore(lock, flags);
+ return __raw_write_lock_irqsave(lock);
}
-EXPORT_SYMBOL(_spin_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_lock_irqsave);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _spin_unlock_irq(spinlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
+void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
{
- __spin_unlock_irq(lock);
+ __raw_write_lock_irq(lock);
}
-EXPORT_SYMBOL(_spin_unlock_irq);
+EXPORT_SYMBOL(_raw_write_lock_irq);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _spin_unlock_bh(spinlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_LOCK_BH
+void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
{
- __spin_unlock_bh(lock);
+ __raw_write_lock_bh(lock);
}
-EXPORT_SYMBOL(_spin_unlock_bh);
+EXPORT_SYMBOL(_raw_write_lock_bh);
#endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+#ifndef CONFIG_INLINE_WRITE_UNLOCK
+void __lockfunc _raw_write_unlock(rwlock_t *lock)
{
- __read_unlock_irqrestore(lock, flags);
+ __raw_write_unlock(lock);
}
-EXPORT_SYMBOL(_read_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_unlock);
#endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _read_unlock_irq(rwlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
+void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
- __read_unlock_irq(lock);
+ __raw_write_unlock_irqrestore(lock, flags);
}
-EXPORT_SYMBOL(_read_unlock_irq);
+EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
#endif
-#ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _read_unlock_bh(rwlock_t *lock)
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
+void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
{
- __read_unlock_bh(lock);
+ __raw_write_unlock_irq(lock);
}
-EXPORT_SYMBOL(_read_unlock_bh);
+EXPORT_SYMBOL(_raw_write_unlock_irq);
#endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
+void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
{
- __write_unlock_irqrestore(lock, flags);
+ __raw_write_unlock_bh(lock);
}
-EXPORT_SYMBOL(_write_unlock_irqrestore);
+EXPORT_SYMBOL(_raw_write_unlock_bh);
#endif
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _write_unlock_irq(rwlock_t *lock)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
{
- __write_unlock_irq(lock);
+ preempt_disable();
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
-EXPORT_SYMBOL(_write_unlock_irq);
-#endif
+EXPORT_SYMBOL(_raw_spin_lock_nested);
-#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _write_unlock_bh(rwlock_t *lock)
+unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
+ int subclass)
{
- __write_unlock_bh(lock);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_disable();
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
+ do_raw_spin_lock_flags, &flags);
+ return flags;
}
-EXPORT_SYMBOL(_write_unlock_bh);
-#endif
+EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
-#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _spin_trylock_bh(spinlock_t *lock)
+void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock,
+ struct lockdep_map *nest_lock)
{
- return __spin_trylock_bh(lock);
+ preempt_disable();
+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+ LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
-EXPORT_SYMBOL(_spin_trylock_bh);
+EXPORT_SYMBOL(_raw_spin_lock_nest_lock);
+
#endif
notrace int in_lock_functions(unsigned long addr)
diff --git a/kernel/sys.c b/kernel/sys.c
index bc1dc61c31ed..26a6b73a6b85 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -8,7 +8,6 @@
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/mman.h>
-#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
@@ -191,10 +190,10 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
!(user = find_user(who)))
goto out_unlock; /* No processes for this user */
- do_each_thread(g, p)
+ do_each_thread(g, p) {
if (__task_cred(p)->uid == who)
error = set_one_prio(p, niceval, error);
- while_each_thread(g, p);
+ } while_each_thread(g, p);
if (who != cred->uid)
free_uid(user); /* For find_user() */
break;
@@ -255,13 +254,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
!(user = find_user(who)))
goto out_unlock; /* No processes for this user */
- do_each_thread(g, p)
+ do_each_thread(g, p) {
if (__task_cred(p)->uid == who) {
niceval = 20 - task_nice(p);
if (niceval > retval)
retval = niceval;
}
- while_each_thread(g, p);
+ } while_each_thread(g, p);
if (who != cred->uid)
free_uid(user); /* for find_user() */
break;
@@ -351,6 +350,9 @@ void kernel_power_off(void)
machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
+
+static DEFINE_MUTEX(reboot_mutex);
+
/*
* Reboot system call: for obvious reasons only root may call it,
* and even root needs to set up some magic numbers in the registers
@@ -383,7 +385,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
cmd = LINUX_REBOOT_CMD_HALT;
- lock_kernel();
+ mutex_lock(&reboot_mutex);
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
@@ -399,20 +401,18 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
case LINUX_REBOOT_CMD_HALT:
kernel_halt();
- unlock_kernel();
do_exit(0);
panic("cannot halt");
case LINUX_REBOOT_CMD_POWER_OFF:
kernel_power_off();
- unlock_kernel();
do_exit(0);
break;
case LINUX_REBOOT_CMD_RESTART2:
if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
- unlock_kernel();
- return -EFAULT;
+ ret = -EFAULT;
+ break;
}
buffer[sizeof(buffer) - 1] = '\0';
@@ -435,7 +435,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
ret = -EINVAL;
break;
}
- unlock_kernel();
+ mutex_unlock(&reboot_mutex);
return ret;
}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e06d0b8d1951..695384f12a7d 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -48,8 +48,10 @@ cond_syscall(sys_shutdown);
cond_syscall(sys_sendmsg);
cond_syscall(compat_sys_sendmsg);
cond_syscall(sys_recvmsg);
+cond_syscall(sys_recvmmsg);
cond_syscall(compat_sys_recvmsg);
cond_syscall(compat_sys_recvfrom);
+cond_syscall(compat_sys_recvmmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
cond_syscall(compat_sys_futex);
@@ -139,7 +141,6 @@ cond_syscall(sys_pciconfig_read);
cond_syscall(sys_pciconfig_write);
cond_syscall(sys_pciconfig_iobase);
cond_syscall(sys32_ipc);
-cond_syscall(sys32_sysctl);
cond_syscall(ppc_rtas);
cond_syscall(sys_spu_run);
cond_syscall(sys_spu_create);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dbf93a52ee9..8a68b2448468 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -27,7 +27,6 @@
#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/kmemcheck.h>
-#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -61,7 +60,6 @@
#include <asm/io.h>
#endif
-static int deprecated_sysctl_warning(struct __sysctl_args *args);
#if defined(CONFIG_SYSCTL)
@@ -210,31 +208,26 @@ extern int lock_stat;
static struct ctl_table root_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kern_table,
},
{
- .ctl_name = CTL_VM,
.procname = "vm",
.mode = 0555,
.child = vm_table,
},
{
- .ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = fs_table,
},
{
- .ctl_name = CTL_DEBUG,
.procname = "debug",
.mode = 0555,
.child = debug_table,
},
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.mode = 0555,
.child = dev_table,
@@ -243,7 +236,7 @@ static struct ctl_table root_table[] = {
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
*/
- { .ctl_name = 0 }
+ { }
};
#ifdef CONFIG_SCHED_DEBUG
@@ -251,196 +244,178 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */
static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_wakeup_granularity_ns; /* 0 usecs */
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
+static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
+static int min_sched_shares_ratelimit = 100000; /* 100 usec */
+static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
#endif
static struct ctl_table kern_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHED_DEBUG
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_min_granularity_ns",
.data = &sysctl_sched_min_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &sched_nr_latency_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_latency_ns",
.data = &sysctl_sched_latency,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &sched_nr_latency_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_wakeup_granularity_ns",
.data = &sysctl_sched_wakeup_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = sched_proc_update_handler,
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_shares_ratelimit",
.data = &sysctl_sched_shares_ratelimit,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_shares_ratelimit,
+ .extra2 = &max_sched_shares_ratelimit,
},
{
- .ctl_name = CTL_UNNUMBERED,
- .procname = "sched_shares_thresh",
- .data = &sysctl_sched_shares_thresh,
- .maxlen = sizeof(unsigned int),
+ .procname = "sched_tunable_scaling",
+ .data = &sysctl_sched_tunable_scaling,
+ .maxlen = sizeof(enum sched_tunable_scaling),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &zero,
+ .proc_handler = sched_proc_update_handler,
+ .extra1 = &min_sched_tunable_scaling,
+ .extra2 = &max_sched_tunable_scaling,
},
{
- .ctl_name = CTL_UNNUMBERED,
- .procname = "sched_features",
- .data = &sysctl_sched_features,
+ .procname = "sched_shares_thresh",
+ .data = &sysctl_sched_shares_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_migration_cost",
.data = &sysctl_sched_migration_cost,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_nr_migrate",
.data = &sysctl_sched_nr_migrate,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_time_avg",
.data = &sysctl_sched_time_avg,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &sched_rt_handler,
+ .proc_handler = sched_rt_handler,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_rt_runtime_us",
.data = &sysctl_sched_rt_runtime,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &sched_rt_handler,
+ .proc_handler = sched_rt_handler,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_PROVE_LOCKING
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "prove_locking",
.data = &prove_locking,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_LOCK_STAT
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "lock_stat",
.data = &lock_stat,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = KERN_PANIC,
.procname = "panic",
.data = &panic_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_CORE_USES_PID,
.procname = "core_uses_pid",
.data = &core_uses_pid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_CORE_PATTERN,
.procname = "core_pattern",
.data = core_pattern,
.maxlen = CORENAME_MAX_SIZE,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "core_pipe_limit",
.data = &core_pipe_limit,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
.maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = &proc_taint,
+ .proc_handler = proc_taint,
},
#endif
#ifdef CONFIG_LATENCYTOP
@@ -449,181 +424,160 @@ static struct ctl_table kern_table[] = {
.data = &latencytop_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_BLK_DEV_INITRD
{
- .ctl_name = KERN_REALROOTDEV,
.procname = "real-root-dev",
.data = &real_root_dev,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "print-fatal-signals",
.data = &print_fatal_signals,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_SPARC
{
- .ctl_name = KERN_SPARC_REBOOT,
.procname = "reboot-cmd",
.data = reboot_command,
.maxlen = 256,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
{
- .ctl_name = KERN_SPARC_STOP_A,
.procname = "stop-a",
.data = &stop_a_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_SPARC_SCONS_PWROFF,
.procname = "scons-poweroff",
.data = &scons_pwroff,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_SPARC64
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "tsb-ratio",
.data = &sysctl_tsb_ratio,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef __hppa__
{
- .ctl_name = KERN_HPPA_PWRSW,
.procname = "soft-power",
.data = &pwrsw_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_HPPA_UNALIGNED,
.procname = "unaligned-trap",
.data = &unaligned_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = KERN_CTLALTDEL,
.procname = "ctrl-alt-del",
.data = &C_A_D,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_FUNCTION_TRACER
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_enabled",
.data = &ftrace_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &ftrace_enable_sysctl,
+ .proc_handler = ftrace_enable_sysctl,
},
#endif
#ifdef CONFIG_STACK_TRACER
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "stack_tracer_enabled",
.data = &stack_tracer_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &stack_trace_sysctl,
+ .proc_handler = stack_trace_sysctl,
},
#endif
#ifdef CONFIG_TRACING
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_dump_on_oops",
.data = &ftrace_dump_on_oops,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_MODULES
{
- .ctl_name = KERN_MODPROBE,
.procname = "modprobe",
.data = &modprobe_path,
.maxlen = KMOD_PATH_LEN,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "modules_disabled",
.data = &modules_disabled,
.maxlen = sizeof(int),
.mode = 0644,
/* only handle a transition from default "0" to "1" */
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &one,
},
#endif
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
{
- .ctl_name = KERN_HOTPLUG,
.procname = "hotplug",
.data = &uevent_helper,
.maxlen = UEVENT_HELPER_PATH_LEN,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
#endif
#ifdef CONFIG_CHR_DEV_SG
{
- .ctl_name = KERN_SG_BIG_BUFF,
.procname = "sg-big-buff",
.data = &sg_big_buff,
.maxlen = sizeof (int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
{
- .ctl_name = KERN_ACCT,
.procname = "acct",
.data = &acct_parm,
.maxlen = 3*sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_MAGIC_SYSRQ
{
- .ctl_name = KERN_SYSRQ,
.procname = "sysrq",
.data = &__sysrq_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_PROC_SYSCTL
@@ -632,215 +586,188 @@ static struct ctl_table kern_table[] = {
.data = NULL,
.maxlen = sizeof (int),
.mode = 0600,
- .proc_handler = &proc_do_cad_pid,
+ .proc_handler = proc_do_cad_pid,
},
#endif
{
- .ctl_name = KERN_MAX_THREADS,
.procname = "threads-max",
.data = &max_threads,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_RANDOM,
.procname = "random",
.mode = 0555,
.child = random_table,
},
{
- .ctl_name = KERN_OVERFLOWUID,
.procname = "overflowuid",
.data = &overflowuid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &minolduid,
.extra2 = &maxolduid,
},
{
- .ctl_name = KERN_OVERFLOWGID,
.procname = "overflowgid",
.data = &overflowgid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &minolduid,
.extra2 = &maxolduid,
},
#ifdef CONFIG_S390
#ifdef CONFIG_MATHEMU
{
- .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
.procname = "ieee_emulation_warnings",
.data = &sysctl_ieee_emulation_warnings,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
.procname = "userprocess_debug",
.data = &sysctl_userprocess_debug,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = KERN_PIDMAX,
.procname = "pid_max",
.data = &pid_max,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &pid_max_min,
.extra2 = &pid_max_max,
},
{
- .ctl_name = KERN_PANIC_ON_OOPS,
.procname = "panic_on_oops",
.data = &panic_on_oops,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#if defined CONFIG_PRINTK
{
- .ctl_name = KERN_PRINTK,
.procname = "printk",
.data = &console_loglevel,
.maxlen = 4*sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_PRINTK_RATELIMIT,
.procname = "printk_ratelimit",
.data = &printk_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
},
{
- .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
.procname = "printk_ratelimit_burst",
.data = &printk_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "printk_delay",
.data = &printk_delay_msec,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &ten_thousand,
},
#endif
{
- .ctl_name = KERN_NGROUPS_MAX,
.procname = "ngroups_max",
.data = &ngroups_max,
.maxlen = sizeof (int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
- .ctl_name = KERN_UNKNOWN_NMI_PANIC,
.procname = "unknown_nmi_panic",
.data = &unknown_nmi_panic,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.procname = "nmi_watchdog",
.data = &nmi_watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_nmi_enabled,
+ .proc_handler = proc_nmi_enabled,
},
#endif
#if defined(CONFIG_X86)
{
- .ctl_name = KERN_PANIC_ON_NMI,
.procname = "panic_on_unrecovered_nmi",
.data = &panic_on_unrecovered_nmi,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "panic_on_io_nmi",
.data = &panic_on_io_nmi,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = KERN_BOOTLOADER_TYPE,
.procname = "bootloader_type",
.data = &bootloader_type,
.maxlen = sizeof (int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "bootloader_version",
.data = &bootloader_version,
.maxlen = sizeof (int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kstack_depth_to_print",
.data = &kstack_depth_to_print,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "io_delay_type",
.data = &io_delay_type,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#if defined(CONFIG_MMU)
{
- .ctl_name = KERN_RANDOMIZE,
.procname = "randomize_va_space",
.data = &randomize_va_space,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#if defined(CONFIG_S390) && defined(CONFIG_SMP)
{
- .ctl_name = KERN_SPIN_RETRY,
.procname = "spin_retry",
.data = &spin_retry,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
@@ -849,123 +776,104 @@ static struct ctl_table kern_table[] = {
.data = &acpi_realmode_flags,
.maxlen = sizeof (unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
},
#endif
#ifdef CONFIG_IA64
{
- .ctl_name = KERN_IA64_UNALIGNED,
.procname = "ignore-unaligned-usertrap",
.data = &no_unaligned_warning,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "unaligned-dump-stack",
.data = &unaligned_dump_stack,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dosoftlockup_thresh,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dosoftlockup_thresh,
.extra1 = &neg_one,
.extra2 = &sixty,
},
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_panic",
.data = &sysctl_hung_task_panic,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_check_count",
.data = &sysctl_hung_task_check_count,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_doulongvec_minmax,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_timeout_secs",
.data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_dohung_task_timeout_secs,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dohung_task_timeout_secs,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_warnings",
.data = &sysctl_hung_task_warnings,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_doulongvec_minmax,
},
#endif
#ifdef CONFIG_COMPAT
{
- .ctl_name = KERN_COMPAT_LOG,
.procname = "compat-log",
.data = &compat_log,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_RT_MUTEXES
{
- .ctl_name = KERN_MAX_LOCK_DEPTH,
.procname = "max_lock_depth",
.data = &max_lock_depth,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "poweroff_cmd",
.data = &poweroff_cmd,
.maxlen = POWEROFF_CMD_PATH_LEN,
.mode = 0644,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string,
+ .proc_handler = proc_dostring,
},
#ifdef CONFIG_KEYS
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "keys",
.mode = 0555,
.child = key_sysctls,
@@ -973,17 +881,15 @@ static struct ctl_table kern_table[] = {
#endif
#ifdef CONFIG_RCU_TORTURE_TEST
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rcutorture_runnable",
.data = &rcutorture_runnable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_SLOW_WORK
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "slow-work",
.mode = 0555,
.child = slow_work_sysctls,
@@ -991,146 +897,127 @@ static struct ctl_table kern_table[] = {
#endif
#ifdef CONFIG_PERF_EVENTS
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "perf_event_paranoid",
.data = &sysctl_perf_event_paranoid,
.maxlen = sizeof(sysctl_perf_event_paranoid),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "perf_event_mlock_kb",
.data = &sysctl_perf_event_mlock,
.maxlen = sizeof(sysctl_perf_event_mlock),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "perf_event_max_sample_rate",
.data = &sysctl_perf_event_sample_rate,
.maxlen = sizeof(sysctl_perf_event_sample_rate),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_KMEMCHECK
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kmemcheck",
.data = &kmemcheck_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_BLOCK
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "blk_iopoll",
.data = &blk_iopoll_enabled,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
*/
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table vm_table[] = {
{
- .ctl_name = VM_OVERCOMMIT_MEMORY,
.procname = "overcommit_memory",
.data = &sysctl_overcommit_memory,
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_PANIC_ON_OOM,
.procname = "panic_on_oom",
.data = &sysctl_panic_on_oom,
.maxlen = sizeof(sysctl_panic_on_oom),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "oom_kill_allocating_task",
.data = &sysctl_oom_kill_allocating_task,
.maxlen = sizeof(sysctl_oom_kill_allocating_task),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "oom_dump_tasks",
.data = &sysctl_oom_dump_tasks,
.maxlen = sizeof(sysctl_oom_dump_tasks),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_OVERCOMMIT_RATIO,
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
.maxlen = sizeof(sysctl_overcommit_ratio),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_PAGE_CLUSTER,
.procname = "page-cluster",
.data = &page_cluster,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_DIRTY_BACKGROUND,
.procname = "dirty_background_ratio",
.data = &dirty_background_ratio,
.maxlen = sizeof(dirty_background_ratio),
.mode = 0644,
- .proc_handler = &dirty_background_ratio_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = dirty_background_ratio_handler,
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "dirty_background_bytes",
.data = &dirty_background_bytes,
.maxlen = sizeof(dirty_background_bytes),
.mode = 0644,
- .proc_handler = &dirty_background_bytes_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = dirty_background_bytes_handler,
.extra1 = &one_ul,
},
{
- .ctl_name = VM_DIRTY_RATIO,
.procname = "dirty_ratio",
.data = &vm_dirty_ratio,
.maxlen = sizeof(vm_dirty_ratio),
.mode = 0644,
- .proc_handler = &dirty_ratio_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = dirty_ratio_handler,
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "dirty_bytes",
.data = &vm_dirty_bytes,
.maxlen = sizeof(vm_dirty_bytes),
.mode = 0644,
- .proc_handler = &dirty_bytes_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = dirty_bytes_handler,
.extra1 = &dirty_bytes_min,
},
{
@@ -1138,289 +1025,258 @@ static struct ctl_table vm_table[] = {
.data = &dirty_writeback_interval,
.maxlen = sizeof(dirty_writeback_interval),
.mode = 0644,
- .proc_handler = &dirty_writeback_centisecs_handler,
+ .proc_handler = dirty_writeback_centisecs_handler,
},
{
.procname = "dirty_expire_centisecs",
.data = &dirty_expire_interval,
.maxlen = sizeof(dirty_expire_interval),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_NR_PDFLUSH_THREADS,
.procname = "nr_pdflush_threads",
.data = &nr_pdflush_threads,
.maxlen = sizeof nr_pdflush_threads,
.mode = 0444 /* read-only*/,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = VM_SWAPPINESS,
.procname = "swappiness",
.data = &vm_swappiness,
.maxlen = sizeof(vm_swappiness),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one_hundred,
},
#ifdef CONFIG_HUGETLB_PAGE
- {
+ {
.procname = "nr_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &hugetlb_sysctl_handler,
+ .proc_handler = hugetlb_sysctl_handler,
.extra1 = (void *)&hugetlb_zero,
.extra2 = (void *)&hugetlb_infinity,
- },
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
+ },
+#endif
{
- .ctl_name = VM_HUGETLB_GROUP,
.procname = "hugetlb_shm_group",
.data = &sysctl_hugetlb_shm_group,
.maxlen = sizeof(gid_t),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hugepages_treat_as_movable",
.data = &hugepages_treat_as_movable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &hugetlb_treat_movable_handler,
+ .proc_handler = hugetlb_treat_movable_handler,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nr_overcommit_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &hugetlb_overcommit_handler,
+ .proc_handler = hugetlb_overcommit_handler,
.extra1 = (void *)&hugetlb_zero,
.extra2 = (void *)&hugetlb_infinity,
},
#endif
{
- .ctl_name = VM_LOWMEM_RESERVE_RATIO,
.procname = "lowmem_reserve_ratio",
.data = &sysctl_lowmem_reserve_ratio,
.maxlen = sizeof(sysctl_lowmem_reserve_ratio),
.mode = 0644,
- .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = lowmem_reserve_ratio_sysctl_handler,
},
{
- .ctl_name = VM_DROP_PAGECACHE,
.procname = "drop_caches",
.data = &sysctl_drop_caches,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
- .strategy = &sysctl_intvec,
},
{
- .ctl_name = VM_MIN_FREE_KBYTES,
.procname = "min_free_kbytes",
.data = &min_free_kbytes,
.maxlen = sizeof(min_free_kbytes),
.mode = 0644,
- .proc_handler = &min_free_kbytes_sysctl_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = min_free_kbytes_sysctl_handler,
.extra1 = &zero,
},
{
- .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
.procname = "percpu_pagelist_fraction",
.data = &percpu_pagelist_fraction,
.maxlen = sizeof(percpu_pagelist_fraction),
.mode = 0644,
- .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = percpu_pagelist_fraction_sysctl_handler,
.extra1 = &min_percpu_pagelist_fract,
},
#ifdef CONFIG_MMU
{
- .ctl_name = VM_MAX_MAP_COUNT,
.procname = "max_map_count",
.data = &sysctl_max_map_count,
.maxlen = sizeof(sysctl_max_map_count),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
#else
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nr_trim_pages",
.data = &sysctl_nr_trim_pages,
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
#endif
{
- .ctl_name = VM_LAPTOP_MODE,
.procname = "laptop_mode",
.data = &laptop_mode,
.maxlen = sizeof(laptop_mode),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
},
{
- .ctl_name = VM_BLOCK_DUMP,
.procname = "block_dump",
.data = &block_dump,
.maxlen = sizeof(block_dump),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
.extra1 = &zero,
},
{
- .ctl_name = VM_VFS_CACHE_PRESSURE,
.procname = "vfs_cache_pressure",
.data = &sysctl_vfs_cache_pressure,
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
.extra1 = &zero,
},
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
{
- .ctl_name = VM_LEGACY_VA_LAYOUT,
.procname = "legacy_va_layout",
.data = &sysctl_legacy_va_layout,
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
.extra1 = &zero,
},
#endif
#ifdef CONFIG_NUMA
{
- .ctl_name = VM_ZONE_RECLAIM_MODE,
.procname = "zone_reclaim_mode",
.data = &zone_reclaim_mode,
.maxlen = sizeof(zone_reclaim_mode),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
.extra1 = &zero,
},
{
- .ctl_name = VM_MIN_UNMAPPED,
.procname = "min_unmapped_ratio",
.data = &sysctl_min_unmapped_ratio,
.maxlen = sizeof(sysctl_min_unmapped_ratio),
.mode = 0644,
- .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
- .ctl_name = VM_MIN_SLAB,
.procname = "min_slab_ratio",
.data = &sysctl_min_slab_ratio,
.maxlen = sizeof(sysctl_min_slab_ratio),
.mode = 0644,
- .proc_handler = &sysctl_min_slab_ratio_sysctl_handler,
- .strategy = &sysctl_intvec,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
.extra1 = &zero,
.extra2 = &one_hundred,
},
#endif
#ifdef CONFIG_SMP
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "stat_interval",
.data = &sysctl_stat_interval,
.maxlen = sizeof(sysctl_stat_interval),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = proc_dointvec_jiffies,
},
#endif
+#ifdef CONFIG_MMU
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "mmap_min_addr",
.data = &dac_mmap_min_addr,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &mmap_min_addr_handler,
+ .proc_handler = mmap_min_addr_handler,
},
+#endif
#ifdef CONFIG_NUMA
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "numa_zonelist_order",
.data = &numa_zonelist_order,
.maxlen = NUMA_ZONELIST_ORDER_LEN,
.mode = 0644,
- .proc_handler = &numa_zonelist_order_handler,
- .strategy = &sysctl_string,
+ .proc_handler = numa_zonelist_order_handler,
},
#endif
#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
- .ctl_name = VM_VDSO_ENABLED,
.procname = "vdso_enabled",
.data = &vdso_enabled,
.maxlen = sizeof(vdso_enabled),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
.extra1 = &zero,
},
#endif
#ifdef CONFIG_HIGHMEM
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "highmem_is_dirtyable",
.data = &vm_highmem_is_dirtyable,
.maxlen = sizeof(vm_highmem_is_dirtyable),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "scan_unevictable_pages",
.data = &scan_unevictable_pages,
.maxlen = sizeof(scan_unevictable_pages),
.mode = 0644,
- .proc_handler = &scan_unevictable_handler,
+ .proc_handler = scan_unevictable_handler,
},
#ifdef CONFIG_MEMORY_FAILURE
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "memory_failure_early_kill",
.data = &sysctl_memory_failure_early_kill,
.maxlen = sizeof(sysctl_memory_failure_early_kill),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "memory_failure_recovery",
.data = &sysctl_memory_failure_recovery,
.maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
@@ -1430,116 +1286,104 @@ static struct ctl_table vm_table[] = {
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
*/
- { .ctl_name = 0 }
+ { }
};
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
static struct ctl_table binfmt_misc_table[] = {
- { .ctl_name = 0 }
+ { }
};
#endif
static struct ctl_table fs_table[] = {
{
- .ctl_name = FS_NRINODE,
.procname = "inode-nr",
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_STATINODE,
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
.procname = "file-nr",
.data = &files_stat,
.maxlen = 3*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_nr_files,
+ .proc_handler = proc_nr_files,
},
{
- .ctl_name = FS_MAXFILE,
.procname = "file-max",
.data = &files_stat.max_files,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "nr_open",
.data = &sysctl_nr_open,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &sysctl_nr_open_min,
.extra2 = &sysctl_nr_open_max,
},
{
- .ctl_name = FS_DENTRY,
.procname = "dentry-state",
.data = &dentry_stat,
.maxlen = 6*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = FS_OVERFLOWUID,
.procname = "overflowuid",
.data = &fs_overflowuid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &minolduid,
.extra2 = &maxolduid,
},
{
- .ctl_name = FS_OVERFLOWGID,
.procname = "overflowgid",
.data = &fs_overflowgid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &minolduid,
.extra2 = &maxolduid,
},
#ifdef CONFIG_FILE_LOCKING
{
- .ctl_name = FS_LEASES,
.procname = "leases-enable",
.data = &leases_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_DNOTIFY
{
- .ctl_name = FS_DIR_NOTIFY,
.procname = "dir-notify-enable",
.data = &dir_notify_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_MMU
#ifdef CONFIG_FILE_LOCKING
{
- .ctl_name = FS_LEASE_TIME,
.procname = "lease-break-time",
.data = &lease_break_time,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_AIO
@@ -1548,19 +1392,18 @@ static struct ctl_table fs_table[] = {
.data = &aio_nr,
.maxlen = sizeof(aio_nr),
.mode = 0444,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "aio-max-nr",
.data = &aio_max_nr,
.maxlen = sizeof(aio_max_nr),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
},
#endif /* CONFIG_AIO */
#ifdef CONFIG_INOTIFY_USER
{
- .ctl_name = FS_INOTIFY,
.procname = "inotify",
.mode = 0555,
.child = inotify_table,
@@ -1575,19 +1418,16 @@ static struct ctl_table fs_table[] = {
#endif
#endif
{
- .ctl_name = KERN_SETUID_DUMPABLE,
.procname = "suid_dumpable",
.data = &suid_dumpable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &two,
},
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "binfmt_misc",
.mode = 0555,
.child = binfmt_misc_table,
@@ -1597,13 +1437,12 @@ static struct ctl_table fs_table[] = {
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
*/
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table debug_table[] = {
#if defined(CONFIG_X86) || defined(CONFIG_PPC)
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "exception-trace",
.data = &show_unhandled_signals,
.maxlen = sizeof(int),
@@ -1611,11 +1450,11 @@ static struct ctl_table debug_table[] = {
.proc_handler = proc_dointvec
},
#endif
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table dev_table[] = {
- { .ctl_name = 0 }
+ { }
};
static DEFINE_SPINLOCK(sysctl_lock);
@@ -1769,122 +1608,6 @@ void register_sysctl_root(struct ctl_table_root *root)
spin_unlock(&sysctl_lock);
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-/* Perform the actual read/write of a sysctl table entry. */
-static int do_sysctl_strategy(struct ctl_table_root *root,
- struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- int op = 0, rc;
-
- if (oldval)
- op |= MAY_READ;
- if (newval)
- op |= MAY_WRITE;
- if (sysctl_perm(root, table, op))
- return -EPERM;
-
- if (table->strategy) {
- rc = table->strategy(table, oldval, oldlenp, newval, newlen);
- if (rc < 0)
- return rc;
- if (rc > 0)
- return 0;
- }
-
- /* If there is no strategy routine, or if the strategy returns
- * zero, proceed with automatic r/w */
- if (table->data && table->maxlen) {
- rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
- if (rc < 0)
- return rc;
- }
- return 0;
-}
-
-static int parse_table(int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
- struct ctl_table_root *root,
- struct ctl_table *table)
-{
- int n;
-repeat:
- if (!nlen)
- return -ENOTDIR;
- if (get_user(n, name))
- return -EFAULT;
- for ( ; table->ctl_name || table->procname; table++) {
- if (!table->ctl_name)
- continue;
- if (n == table->ctl_name) {
- int error;
- if (table->child) {
- if (sysctl_perm(root, table, MAY_EXEC))
- return -EPERM;
- name++;
- nlen--;
- table = table->child;
- goto repeat;
- }
- error = do_sysctl_strategy(root, table,
- oldval, oldlenp,
- newval, newlen);
- return error;
- }
- }
- return -ENOTDIR;
-}
-
-int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- struct ctl_table_header *head;
- int error = -ENOTDIR;
-
- if (nlen <= 0 || nlen >= CTL_MAXNAME)
- return -ENOTDIR;
- if (oldval) {
- int old_len;
- if (!oldlenp || get_user(old_len, oldlenp))
- return -EFAULT;
- }
-
- for (head = sysctl_head_next(NULL); head;
- head = sysctl_head_next(head)) {
- error = parse_table(name, nlen, oldval, oldlenp,
- newval, newlen,
- head->root, head->ctl_table);
- if (error != -ENOTDIR) {
- sysctl_head_finish(head);
- break;
- }
- }
- return error;
-}
-
-SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
-{
- struct __sysctl_args tmp;
- int error;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- error = deprecated_sysctl_warning(&tmp);
- if (error)
- goto out;
-
- lock_kernel();
- error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
- tmp.newval, tmp.newlen);
- unlock_kernel();
-out:
- return error;
-}
-#endif /* CONFIG_SYSCTL_SYSCALL */
-
/*
* sysctl_perm does NOT grant the superuser all rights automatically, because
* some sysctl variables are readonly even to root.
@@ -1920,7 +1643,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
{
- for (; table->ctl_name || table->procname; table++) {
+ for (; table->procname; table++) {
table->parent = parent;
if (table->child)
sysctl_set_parent(table, table->child);
@@ -1952,11 +1675,11 @@ static struct ctl_table *is_branch_in(struct ctl_table *branch,
return NULL;
/* ... and nothing else */
- if (branch[1].procname || branch[1].ctl_name)
+ if (branch[1].procname)
return NULL;
/* table should contain subdirectory with the same name */
- for (p = table; p->procname || p->ctl_name; p++) {
+ for (p = table; p->procname; p++) {
if (!p->child)
continue;
if (p->procname && strcmp(p->procname, s) == 0)
@@ -2001,9 +1724,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
*
* The members of the &struct ctl_table structure are used as follows:
*
- * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
- * must be unique within that level of sysctl
- *
* procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
* enter a sysctl file
*
@@ -2018,8 +1738,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
*
* proc_handler - the text handler routine (described below)
*
- * strategy - the strategy routine (described below)
- *
* de - for internal use by the sysctl routines
*
* extra1, extra2 - extra pointers usable by the proc handler routines
@@ -2032,19 +1750,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
* struct enable minimal validation of the values being written to be
* performed, and the mode field allows minimal authentication.
*
- * More sophisticated management can be enabled by the provision of a
- * strategy routine with the table entry. This will be called before
- * any automatic read or write of the data is performed.
- *
- * The strategy routine may return
- *
- * < 0 - Error occurred (error is passed to user process)
- *
- * 0 - OK - proceed with automatic read or write.
- *
- * > 0 - OK - read or write has been done by the strategy routine, so
- * return immediately.
- *
* There must be a proc_handler routine for any terminal nodes
* mirrored under /proc/sys (non-terminals are handled by a built-in
* directory handler). Several default handlers are available to
@@ -2071,13 +1776,13 @@ struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_set *set;
/* Count the path components */
- for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
+ for (npath = 0; path[npath].procname; ++npath)
;
/*
* For each path component, allocate a 2-element ctl_table array.
* The first array element will be filled with the sysctl entry
- * for this, the second will be the sentinel (ctl_name == 0).
+ * for this, the second will be the sentinel (procname == 0).
*
* We allocate everything in one go so that we don't have to
* worry about freeing additional memory in unregister_sysctl_table.
@@ -2094,7 +1799,6 @@ struct ctl_table_header *__register_sysctl_paths(
for (n = 0; n < npath; ++n, ++path) {
/* Copy the procname */
new->procname = path->procname;
- new->ctl_name = path->ctl_name;
new->mode = 0555;
*prevp = new;
@@ -2956,286 +2660,6 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_FS */
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-/*
- * General sysctl support routines
- */
-
-/* The generic sysctl data routine (used if no strategy routine supplied) */
-int sysctl_data(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- size_t len;
-
- /* Get out of I don't have a variable */
- if (!table->data || !table->maxlen)
- return -ENOTDIR;
-
- if (oldval && oldlenp) {
- if (get_user(len, oldlenp))
- return -EFAULT;
- if (len) {
- if (len > table->maxlen)
- len = table->maxlen;
- if (copy_to_user(oldval, table->data, len))
- return -EFAULT;
- if (put_user(len, oldlenp))
- return -EFAULT;
- }
- }
-
- if (newval && newlen) {
- if (newlen > table->maxlen)
- newlen = table->maxlen;
-
- if (copy_from_user(table->data, newval, newlen))
- return -EFAULT;
- }
- return 1;
-}
-
-/* The generic string strategy routine: */
-int sysctl_string(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (!table->data || !table->maxlen)
- return -ENOTDIR;
-
- if (oldval && oldlenp) {
- size_t bufsize;
- if (get_user(bufsize, oldlenp))
- return -EFAULT;
- if (bufsize) {
- size_t len = strlen(table->data), copied;
-
- /* This shouldn't trigger for a well-formed sysctl */
- if (len > table->maxlen)
- len = table->maxlen;
-
- /* Copy up to a max of bufsize-1 bytes of the string */
- copied = (len >= bufsize) ? bufsize - 1 : len;
-
- if (copy_to_user(oldval, table->data, copied) ||
- put_user(0, (char __user *)(oldval + copied)))
- return -EFAULT;
- if (put_user(len, oldlenp))
- return -EFAULT;
- }
- }
- if (newval && newlen) {
- size_t len = newlen;
- if (len > table->maxlen)
- len = table->maxlen;
- if(copy_from_user(table->data, newval, len))
- return -EFAULT;
- if (len == table->maxlen)
- len--;
- ((char *) table->data)[len] = 0;
- }
- return 1;
-}
-
-/*
- * This function makes sure that all of the integers in the vector
- * are between the minimum and maximum values given in the arrays
- * table->extra1 and table->extra2, respectively.
- */
-int sysctl_intvec(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
-
- if (newval && newlen) {
- int __user *vec = (int __user *) newval;
- int *min = (int *) table->extra1;
- int *max = (int *) table->extra2;
- size_t length;
- int i;
-
- if (newlen % sizeof(int) != 0)
- return -EINVAL;
-
- if (!table->extra1 && !table->extra2)
- return 0;
-
- if (newlen > table->maxlen)
- newlen = table->maxlen;
- length = newlen / sizeof(int);
-
- for (i = 0; i < length; i++) {
- int value;
- if (get_user(value, vec + i))
- return -EFAULT;
- if (min && value < min[i])
- return -EINVAL;
- if (max && value > max[i])
- return -EINVAL;
- }
- }
- return 0;
-}
-
-/* Strategy function to convert jiffies to seconds */
-int sysctl_jiffies(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (oldval && oldlenp) {
- size_t olen;
-
- if (get_user(olen, oldlenp))
- return -EFAULT;
- if (olen) {
- int val;
-
- if (olen < sizeof(int))
- return -EINVAL;
-
- val = *(int *)(table->data) / HZ;
- if (put_user(val, (int __user *)oldval))
- return -EFAULT;
- if (put_user(sizeof(int), oldlenp))
- return -EFAULT;
- }
- }
- if (newval && newlen) {
- int new;
- if (newlen != sizeof(int))
- return -EINVAL;
- if (get_user(new, (int __user *)newval))
- return -EFAULT;
- *(int *)(table->data) = new*HZ;
- }
- return 1;
-}
-
-/* Strategy function to convert jiffies to seconds */
-int sysctl_ms_jiffies(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (oldval && oldlenp) {
- size_t olen;
-
- if (get_user(olen, oldlenp))
- return -EFAULT;
- if (olen) {
- int val;
-
- if (olen < sizeof(int))
- return -EINVAL;
-
- val = jiffies_to_msecs(*(int *)(table->data));
- if (put_user(val, (int __user *)oldval))
- return -EFAULT;
- if (put_user(sizeof(int), oldlenp))
- return -EFAULT;
- }
- }
- if (newval && newlen) {
- int new;
- if (newlen != sizeof(int))
- return -EINVAL;
- if (get_user(new, (int __user *)newval))
- return -EFAULT;
- *(int *)(table->data) = msecs_to_jiffies(new);
- }
- return 1;
-}
-
-
-
-#else /* CONFIG_SYSCTL_SYSCALL */
-
-
-SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
-{
- struct __sysctl_args tmp;
- int error;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- error = deprecated_sysctl_warning(&tmp);
-
- /* If no error reading the parameters then just -ENOSYS ... */
- if (!error)
- error = -ENOSYS;
-
- return error;
-}
-
-int sysctl_data(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-int sysctl_string(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-int sysctl_intvec(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-int sysctl_jiffies(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-int sysctl_ms_jiffies(struct ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
-#endif /* CONFIG_SYSCTL_SYSCALL */
-
-static int deprecated_sysctl_warning(struct __sysctl_args *args)
-{
- static int msg_count;
- int name[CTL_MAXNAME];
- int i;
-
- /* Check args->nlen. */
- if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
- return -ENOTDIR;
-
- /* Read in the sysctl name for better debug message logging */
- for (i = 0; i < args->nlen; i++)
- if (get_user(name[i], args->name + i))
- return -EFAULT;
-
- /* Ignore accesses to kernel.version */
- if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
- return 0;
-
- if (msg_count < 5) {
- msg_count++;
- printk(KERN_INFO
- "warning: process `%s' used the deprecated sysctl "
- "system call with ", current->comm);
- for (i = 0; i < args->nlen; i++)
- printk("%d.", name[i]);
- printk("\n");
- }
- return 0;
-}
-
/*
* No sense putting this after each symbol definition, twice,
* exception granted :-)
@@ -3250,9 +2674,4 @@ EXPORT_SYMBOL(proc_doulongvec_minmax);
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
EXPORT_SYMBOL(register_sysctl_table);
EXPORT_SYMBOL(register_sysctl_paths);
-EXPORT_SYMBOL(sysctl_intvec);
-EXPORT_SYMBOL(sysctl_jiffies);
-EXPORT_SYMBOL(sysctl_ms_jiffies);
-EXPORT_SYMBOL(sysctl_string);
-EXPORT_SYMBOL(sysctl_data);
EXPORT_SYMBOL(unregister_sysctl_table);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
new file mode 100644
index 000000000000..112533d5fc08
--- /dev/null
+++ b/kernel/sysctl_binary.c
@@ -0,0 +1,1514 @@
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include <linux/sunrpc/debug.h>
+#include <linux/string.h>
+#include <net/ip_vs.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
+#include <linux/file.h>
+#include <linux/ctype.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_SYSCTL_SYSCALL
+
+struct bin_table;
+typedef ssize_t bin_convert_t(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen);
+
+static bin_convert_t bin_dir;
+static bin_convert_t bin_string;
+static bin_convert_t bin_intvec;
+static bin_convert_t bin_ulongvec;
+static bin_convert_t bin_uuid;
+static bin_convert_t bin_dn_node_address;
+
+#define CTL_DIR bin_dir
+#define CTL_STR bin_string
+#define CTL_INT bin_intvec
+#define CTL_ULONG bin_ulongvec
+#define CTL_UUID bin_uuid
+#define CTL_DNADR bin_dn_node_address
+
+#define BUFSZ 256
+
+struct bin_table {
+ bin_convert_t *convert;
+ int ctl_name;
+ const char *procname;
+ const struct bin_table *child;
+};
+
+static const struct bin_table bin_random_table[] = {
+ { CTL_INT, RANDOM_POOLSIZE, "poolsize" },
+ { CTL_INT, RANDOM_ENTROPY_COUNT, "entropy_avail" },
+ { CTL_INT, RANDOM_READ_THRESH, "read_wakeup_threshold" },
+ { CTL_INT, RANDOM_WRITE_THRESH, "write_wakeup_threshold" },
+ { CTL_UUID, RANDOM_BOOT_ID, "boot_id" },
+ { CTL_UUID, RANDOM_UUID, "uuid" },
+ {}
+};
+
+static const struct bin_table bin_pty_table[] = {
+ { CTL_INT, PTY_MAX, "max" },
+ { CTL_INT, PTY_NR, "nr" },
+ {}
+};
+
+static const struct bin_table bin_kern_table[] = {
+ { CTL_STR, KERN_OSTYPE, "ostype" },
+ { CTL_STR, KERN_OSRELEASE, "osrelease" },
+ /* KERN_OSREV not used */
+ { CTL_STR, KERN_VERSION, "version" },
+ /* KERN_SECUREMASK not used */
+ /* KERN_PROF not used */
+ { CTL_STR, KERN_NODENAME, "hostname" },
+ { CTL_STR, KERN_DOMAINNAME, "domainname" },
+
+ { CTL_INT, KERN_PANIC, "panic" },
+ { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
+
+ { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
+ { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
+ { CTL_INT, KERN_PRINTK, "printk" },
+
+ /* KERN_NAMETRANS not used */
+ /* KERN_PPC_HTABRECLAIM not used */
+ /* KERN_PPC_ZEROPAGED not used */
+ { CTL_INT, KERN_PPC_POWERSAVE_NAP, "powersave-nap" },
+
+ { CTL_STR, KERN_MODPROBE, "modprobe" },
+ { CTL_INT, KERN_SG_BIG_BUFF, "sg-big-buff" },
+ { CTL_INT, KERN_ACCT, "acct" },
+ /* KERN_PPC_L2CR "l2cr" no longer used */
+
+ /* KERN_RTSIGNR not used */
+ /* KERN_RTSIGMAX not used */
+
+ { CTL_ULONG, KERN_SHMMAX, "shmmax" },
+ { CTL_INT, KERN_MSGMAX, "msgmax" },
+ { CTL_INT, KERN_MSGMNB, "msgmnb" },
+ /* KERN_MSGPOOL not used*/
+ { CTL_INT, KERN_SYSRQ, "sysrq" },
+ { CTL_INT, KERN_MAX_THREADS, "threads-max" },
+ { CTL_DIR, KERN_RANDOM, "random", bin_random_table },
+ { CTL_ULONG, KERN_SHMALL, "shmall" },
+ { CTL_INT, KERN_MSGMNI, "msgmni" },
+ { CTL_INT, KERN_SEM, "sem" },
+ { CTL_INT, KERN_SPARC_STOP_A, "stop-a" },
+ { CTL_INT, KERN_SHMMNI, "shmmni" },
+
+ { CTL_INT, KERN_OVERFLOWUID, "overflowuid" },
+ { CTL_INT, KERN_OVERFLOWGID, "overflowgid" },
+
+ { CTL_STR, KERN_HOTPLUG, "hotplug", },
+ { CTL_INT, KERN_IEEE_EMULATION_WARNINGS, "ieee_emulation_warnings" },
+
+ { CTL_INT, KERN_S390_USER_DEBUG_LOGGING, "userprocess_debug" },
+ { CTL_INT, KERN_CORE_USES_PID, "core_uses_pid" },
+ /* KERN_TAINTED "tainted" no longer used */
+ { CTL_INT, KERN_CADPID, "cad_pid" },
+ { CTL_INT, KERN_PIDMAX, "pid_max" },
+ { CTL_STR, KERN_CORE_PATTERN, "core_pattern" },
+ { CTL_INT, KERN_PANIC_ON_OOPS, "panic_on_oops" },
+ { CTL_INT, KERN_HPPA_PWRSW, "soft-power" },
+ { CTL_INT, KERN_HPPA_UNALIGNED, "unaligned-trap" },
+
+ { CTL_INT, KERN_PRINTK_RATELIMIT, "printk_ratelimit" },
+ { CTL_INT, KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" },
+
+ { CTL_DIR, KERN_PTY, "pty", bin_pty_table },
+ { CTL_INT, KERN_NGROUPS_MAX, "ngroups_max" },
+ { CTL_INT, KERN_SPARC_SCONS_PWROFF, "scons-poweroff" },
+ /* KERN_HZ_TIMER "hz_timer" no longer used */
+ { CTL_INT, KERN_UNKNOWN_NMI_PANIC, "unknown_nmi_panic" },
+ { CTL_INT, KERN_BOOTLOADER_TYPE, "bootloader_type" },
+ { CTL_INT, KERN_RANDOMIZE, "randomize_va_space" },
+
+ { CTL_INT, KERN_SPIN_RETRY, "spin_retry" },
+ /* KERN_ACPI_VIDEO_FLAGS "acpi_video_flags" no longer used */
+ { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
+ { CTL_INT, KERN_COMPAT_LOG, "compat-log" },
+ { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
+ { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" },
+ { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
+ {}
+};
+
+static const struct bin_table bin_vm_table[] = {
+ { CTL_INT, VM_OVERCOMMIT_MEMORY, "overcommit_memory" },
+ { CTL_INT, VM_PAGE_CLUSTER, "page-cluster" },
+ { CTL_INT, VM_DIRTY_BACKGROUND, "dirty_background_ratio" },
+ { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" },
+ /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */
+ /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */
+ { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" },
+ { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" },
+ /* VM_PAGEBUF unused */
+ /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */
+ { CTL_INT, VM_SWAPPINESS, "swappiness" },
+ { CTL_INT, VM_LOWMEM_RESERVE_RATIO, "lowmem_reserve_ratio" },
+ { CTL_INT, VM_MIN_FREE_KBYTES, "min_free_kbytes" },
+ { CTL_INT, VM_MAX_MAP_COUNT, "max_map_count" },
+ { CTL_INT, VM_LAPTOP_MODE, "laptop_mode" },
+ { CTL_INT, VM_BLOCK_DUMP, "block_dump" },
+ { CTL_INT, VM_HUGETLB_GROUP, "hugetlb_shm_group" },
+ { CTL_INT, VM_VFS_CACHE_PRESSURE, "vfs_cache_pressure" },
+ { CTL_INT, VM_LEGACY_VA_LAYOUT, "legacy_va_layout" },
+ /* VM_SWAP_TOKEN_TIMEOUT unused */
+ { CTL_INT, VM_DROP_PAGECACHE, "drop_caches" },
+ { CTL_INT, VM_PERCPU_PAGELIST_FRACTION, "percpu_pagelist_fraction" },
+ { CTL_INT, VM_ZONE_RECLAIM_MODE, "zone_reclaim_mode" },
+ { CTL_INT, VM_MIN_UNMAPPED, "min_unmapped_ratio" },
+ { CTL_INT, VM_PANIC_ON_OOM, "panic_on_oom" },
+ { CTL_INT, VM_VDSO_ENABLED, "vdso_enabled" },
+ { CTL_INT, VM_MIN_SLAB, "min_slab_ratio" },
+
+ {}
+};
+
+static const struct bin_table bin_net_core_table[] = {
+ { CTL_INT, NET_CORE_WMEM_MAX, "wmem_max" },
+ { CTL_INT, NET_CORE_RMEM_MAX, "rmem_max" },
+ { CTL_INT, NET_CORE_WMEM_DEFAULT, "wmem_default" },
+ { CTL_INT, NET_CORE_RMEM_DEFAULT, "rmem_default" },
+ /* NET_CORE_DESTROY_DELAY unused */
+ { CTL_INT, NET_CORE_MAX_BACKLOG, "netdev_max_backlog" },
+ /* NET_CORE_FASTROUTE unused */
+ { CTL_INT, NET_CORE_MSG_COST, "message_cost" },
+ { CTL_INT, NET_CORE_MSG_BURST, "message_burst" },
+ { CTL_INT, NET_CORE_OPTMEM_MAX, "optmem_max" },
+ /* NET_CORE_HOT_LIST_LENGTH unused */
+ /* NET_CORE_DIVERT_VERSION unused */
+ /* NET_CORE_NO_CONG_THRESH unused */
+ /* NET_CORE_NO_CONG unused */
+ /* NET_CORE_LO_CONG unused */
+ /* NET_CORE_MOD_CONG unused */
+ { CTL_INT, NET_CORE_DEV_WEIGHT, "dev_weight" },
+ { CTL_INT, NET_CORE_SOMAXCONN, "somaxconn" },
+ { CTL_INT, NET_CORE_BUDGET, "netdev_budget" },
+ { CTL_INT, NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" },
+ { CTL_INT, NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" },
+ { CTL_INT, NET_CORE_WARNINGS, "warnings" },
+ {},
+};
+
+static const struct bin_table bin_net_unix_table[] = {
+ /* NET_UNIX_DESTROY_DELAY unused */
+ /* NET_UNIX_DELETE_DELAY unused */
+ { CTL_INT, NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv4_route_table[] = {
+ { CTL_INT, NET_IPV4_ROUTE_FLUSH, "flush" },
+ /* NET_IPV4_ROUTE_MIN_DELAY "min_delay" no longer used */
+ /* NET_IPV4_ROUTE_MAX_DELAY "max_delay" no longer used */
+ { CTL_INT, NET_IPV4_ROUTE_GC_THRESH, "gc_thresh" },
+ { CTL_INT, NET_IPV4_ROUTE_MAX_SIZE, "max_size" },
+ { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
+ { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
+ { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
+ { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
+ { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
+ { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
+ { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
+ { CTL_INT, NET_IPV4_ROUTE_ERROR_COST, "error_cost" },
+ { CTL_INT, NET_IPV4_ROUTE_ERROR_BURST, "error_burst" },
+ { CTL_INT, NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity" },
+ { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
+ { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
+ { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
+ { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv4_conf_vars_table[] = {
+ { CTL_INT, NET_IPV4_CONF_FORWARDING, "forwarding" },
+ { CTL_INT, NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" },
+
+ { CTL_INT, NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects" },
+ { CTL_INT, NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects" },
+ { CTL_INT, NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects" },
+ { CTL_INT, NET_IPV4_CONF_SHARED_MEDIA, "shared_media" },
+ { CTL_INT, NET_IPV4_CONF_RP_FILTER, "rp_filter" },
+ { CTL_INT, NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
+ { CTL_INT, NET_IPV4_CONF_PROXY_ARP, "proxy_arp" },
+ { CTL_INT, NET_IPV4_CONF_MEDIUM_ID, "medium_id" },
+ { CTL_INT, NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay" },
+ { CTL_INT, NET_IPV4_CONF_LOG_MARTIANS, "log_martians" },
+ { CTL_INT, NET_IPV4_CONF_TAG, "tag" },
+ { CTL_INT, NET_IPV4_CONF_ARPFILTER, "arp_filter" },
+ { CTL_INT, NET_IPV4_CONF_ARP_ANNOUNCE, "arp_announce" },
+ { CTL_INT, NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
+ { CTL_INT, NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
+ { CTL_INT, NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
+
+ { CTL_INT, NET_IPV4_CONF_NOXFRM, "disable_xfrm" },
+ { CTL_INT, NET_IPV4_CONF_NOPOLICY, "disable_policy" },
+ { CTL_INT, NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" },
+ { CTL_INT, NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv4_conf_table[] = {
+ { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv4_conf_vars_table },
+ { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv4_conf_vars_table },
+ { CTL_DIR, 0, NULL, bin_net_ipv4_conf_vars_table },
+ {}
+};
+
+static const struct bin_table bin_net_neigh_vars_table[] = {
+ { CTL_INT, NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" },
+ { CTL_INT, NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" },
+ { CTL_INT, NET_NEIGH_APP_SOLICIT, "app_solicit" },
+ /* NET_NEIGH_RETRANS_TIME "retrans_time" no longer used */
+ { CTL_INT, NET_NEIGH_REACHABLE_TIME, "base_reachable_time" },
+ { CTL_INT, NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time" },
+ { CTL_INT, NET_NEIGH_GC_STALE_TIME, "gc_stale_time" },
+ { CTL_INT, NET_NEIGH_UNRES_QLEN, "unres_qlen" },
+ { CTL_INT, NET_NEIGH_PROXY_QLEN, "proxy_qlen" },
+ /* NET_NEIGH_ANYCAST_DELAY "anycast_delay" no longer used */
+ /* NET_NEIGH_PROXY_DELAY "proxy_delay" no longer used */
+ /* NET_NEIGH_LOCKTIME "locktime" no longer used */
+ { CTL_INT, NET_NEIGH_GC_INTERVAL, "gc_interval" },
+ { CTL_INT, NET_NEIGH_GC_THRESH1, "gc_thresh1" },
+ { CTL_INT, NET_NEIGH_GC_THRESH2, "gc_thresh2" },
+ { CTL_INT, NET_NEIGH_GC_THRESH3, "gc_thresh3" },
+ { CTL_INT, NET_NEIGH_RETRANS_TIME_MS, "retrans_time_ms" },
+ { CTL_INT, NET_NEIGH_REACHABLE_TIME_MS, "base_reachable_time_ms" },
+ {}
+};
+
+static const struct bin_table bin_net_neigh_table[] = {
+ { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_neigh_vars_table },
+ { CTL_DIR, 0, NULL, bin_net_neigh_vars_table },
+ {}
+};
+
+static const struct bin_table bin_net_ipv4_netfilter_table[] = {
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" },
+
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "ip_conntrack_tcp_timeout_syn_sent" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "ip_conntrack_tcp_timeout_syn_recv" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "ip_conntrack_tcp_timeout_established" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "ip_conntrack_tcp_timeout_fin_wait" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "ip_conntrack_tcp_timeout_close_wait" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "ip_conntrack_tcp_timeout_last_ack" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "ip_conntrack_tcp_timeout_time_wait" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "ip_conntrack_tcp_timeout_close" no longer used */
+
+ /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT "ip_conntrack_udp_timeout" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM "ip_conntrack_udp_timeout_stream" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT "ip_conntrack_icmp_timeout" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT "ip_conntrack_generic_timeout" no longer used */
+
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_BUCKETS, "ip_conntrack_buckets" },
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_LOG_INVALID, "ip_conntrack_log_invalid" },
+ /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "ip_conntrack_tcp_timeout_max_retrans" no longer used */
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_LOOSE, "ip_conntrack_tcp_loose" },
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, "ip_conntrack_tcp_be_liberal" },
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, "ip_conntrack_tcp_max_retrans" },
+
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "ip_conntrack_sctp_timeout_closed" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "ip_conntrack_sctp_timeout_cookie_wait" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "ip_conntrack_sctp_timeout_cookie_echoed" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "ip_conntrack_sctp_timeout_established" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "ip_conntrack_sctp_timeout_shutdown_sent" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "ip_conntrack_sctp_timeout_shutdown_recd" no longer used */
+ /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "ip_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */
+
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_COUNT, "ip_conntrack_count" },
+ { CTL_INT, NET_IPV4_NF_CONNTRACK_CHECKSUM, "ip_conntrack_checksum" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv4_table[] = {
+ {CTL_INT, NET_IPV4_FORWARD, "ip_forward" },
+
+ { CTL_DIR, NET_IPV4_CONF, "conf", bin_net_ipv4_conf_table },
+ { CTL_DIR, NET_IPV4_NEIGH, "neigh", bin_net_neigh_table },
+ { CTL_DIR, NET_IPV4_ROUTE, "route", bin_net_ipv4_route_table },
+ /* NET_IPV4_FIB_HASH unused */
+ { CTL_DIR, NET_IPV4_NETFILTER, "netfilter", bin_net_ipv4_netfilter_table },
+
+ { CTL_INT, NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" },
+ { CTL_INT, NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" },
+ { CTL_INT, NET_IPV4_TCP_SACK, "tcp_sack" },
+ { CTL_INT, NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse" },
+ { CTL_INT, NET_IPV4_DEFAULT_TTL, "ip_default_ttl" },
+ /* NET_IPV4_AUTOCONFIG unused */
+ { CTL_INT, NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc" },
+ { CTL_INT, NET_IPV4_NONLOCAL_BIND, "ip_nonlocal_bind" },
+ { CTL_INT, NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries" },
+ { CTL_INT, NET_TCP_SYNACK_RETRIES, "tcp_synack_retries" },
+ { CTL_INT, NET_TCP_MAX_ORPHANS, "tcp_max_orphans" },
+ { CTL_INT, NET_TCP_MAX_TW_BUCKETS, "tcp_max_tw_buckets" },
+ { CTL_INT, NET_IPV4_DYNADDR, "ip_dynaddr" },
+ { CTL_INT, NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time" },
+ { CTL_INT, NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes" },
+ { CTL_INT, NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl" },
+ { CTL_INT, NET_IPV4_TCP_RETRIES1, "tcp_retries1" },
+ { CTL_INT, NET_IPV4_TCP_RETRIES2, "tcp_retries2" },
+ { CTL_INT, NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout" },
+ { CTL_INT, NET_TCP_SYNCOOKIES, "tcp_syncookies" },
+ { CTL_INT, NET_TCP_TW_RECYCLE, "tcp_tw_recycle" },
+ { CTL_INT, NET_TCP_ABORT_ON_OVERFLOW, "tcp_abort_on_overflow" },
+ { CTL_INT, NET_TCP_STDURG, "tcp_stdurg" },
+ { CTL_INT, NET_TCP_RFC1337, "tcp_rfc1337" },
+ { CTL_INT, NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog" },
+ { CTL_INT, NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range" },
+ { CTL_INT, NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships" },
+ { CTL_INT, NET_IPV4_IGMP_MAX_MSF, "igmp_max_msf" },
+ { CTL_INT, NET_IPV4_INET_PEER_THRESHOLD, "inet_peer_threshold" },
+ { CTL_INT, NET_IPV4_INET_PEER_MINTTL, "inet_peer_minttl" },
+ { CTL_INT, NET_IPV4_INET_PEER_MAXTTL, "inet_peer_maxttl" },
+ { CTL_INT, NET_IPV4_INET_PEER_GC_MINTIME, "inet_peer_gc_mintime" },
+ { CTL_INT, NET_IPV4_INET_PEER_GC_MAXTIME, "inet_peer_gc_maxtime" },
+ { CTL_INT, NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries" },
+ { CTL_INT, NET_TCP_FACK, "tcp_fack" },
+ { CTL_INT, NET_TCP_REORDERING, "tcp_reordering" },
+ { CTL_INT, NET_TCP_ECN, "tcp_ecn" },
+ { CTL_INT, NET_TCP_DSACK, "tcp_dsack" },
+ { CTL_INT, NET_TCP_MEM, "tcp_mem" },
+ { CTL_INT, NET_TCP_WMEM, "tcp_wmem" },
+ { CTL_INT, NET_TCP_RMEM, "tcp_rmem" },
+ { CTL_INT, NET_TCP_APP_WIN, "tcp_app_win" },
+ { CTL_INT, NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale" },
+ { CTL_INT, NET_TCP_TW_REUSE, "tcp_tw_reuse" },
+ { CTL_INT, NET_TCP_FRTO, "tcp_frto" },
+ { CTL_INT, NET_TCP_FRTO_RESPONSE, "tcp_frto_response" },
+ { CTL_INT, NET_TCP_LOW_LATENCY, "tcp_low_latency" },
+ { CTL_INT, NET_TCP_NO_METRICS_SAVE, "tcp_no_metrics_save" },
+ { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" },
+ { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" },
+ { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" },
+ { CTL_INT, NET_TCP_ABC, "tcp_abc" },
+ { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" },
+ { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" },
+ { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" },
+ { CTL_INT, NET_TCP_DMA_COPYBREAK, "tcp_dma_copybreak" },
+ { CTL_INT, NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" },
+ { CTL_INT, NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" },
+ { CTL_INT, NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" },
+ { CTL_INT, NET_CIPSOV4_RBM_OPTFMT, "cipso_rbm_optfmt" },
+ { CTL_INT, NET_CIPSOV4_RBM_STRICTVALID, "cipso_rbm_strictvalid" },
+ /* NET_TCP_AVAIL_CONG_CONTROL "tcp_available_congestion_control" no longer used */
+ { CTL_STR, NET_TCP_ALLOWED_CONG_CONTROL, "tcp_allowed_congestion_control" },
+ { CTL_INT, NET_TCP_MAX_SSTHRESH, "tcp_max_ssthresh" },
+
+ { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all" },
+ { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts" },
+ { CTL_INT, NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses" },
+ { CTL_INT, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, "icmp_errors_use_inbound_ifaddr" },
+ { CTL_INT, NET_IPV4_ICMP_RATELIMIT, "icmp_ratelimit" },
+ { CTL_INT, NET_IPV4_ICMP_RATEMASK, "icmp_ratemask" },
+
+ { CTL_INT, NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh" },
+ { CTL_INT, NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh" },
+ { CTL_INT, NET_IPV4_IPFRAG_TIME, "ipfrag_time" },
+
+ { CTL_INT, NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" },
+ /* NET_IPV4_IPFRAG_MAX_DIST "ipfrag_max_dist" no longer used */
+
+ { CTL_INT, 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" },
+
+ /* NET_TCP_DEFAULT_WIN_SCALE unused */
+ /* NET_TCP_BIC_BETA unused */
+ /* NET_IPV4_TCP_MAX_KA_PROBES unused */
+ /* NET_IPV4_IP_MASQ_DEBUG unused */
+ /* NET_TCP_SYN_TAILDROP unused */
+ /* NET_IPV4_ICMP_SOURCEQUENCH_RATE unused */
+ /* NET_IPV4_ICMP_DESTUNREACH_RATE unused */
+ /* NET_IPV4_ICMP_TIMEEXCEED_RATE unused */
+ /* NET_IPV4_ICMP_PARAMPROB_RATE unused */
+ /* NET_IPV4_ICMP_ECHOREPLY_RATE unused */
+ /* NET_IPV4_ALWAYS_DEFRAG unused */
+ {}
+};
+
+static const struct bin_table bin_net_ipx_table[] = {
+ { CTL_INT, NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" },
+ /* NET_IPX_FORWARDING unused */
+ {}
+};
+
+static const struct bin_table bin_net_atalk_table[] = {
+ { CTL_INT, NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" },
+ { CTL_INT, NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" },
+ { CTL_INT, NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" },
+ { CTL_INT, NET_ATALK_AARP_RESOLVE_TIME, "aarp-resolve-time" },
+ {},
+};
+
+static const struct bin_table bin_net_netrom_table[] = {
+ { CTL_INT, NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" },
+ { CTL_INT, NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" },
+ { CTL_INT, NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" },
+ { CTL_INT, NET_NETROM_TRANSPORT_TIMEOUT, "transport_timeout" },
+ { CTL_INT, NET_NETROM_TRANSPORT_MAXIMUM_TRIES, "transport_maximum_tries" },
+ { CTL_INT, NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, "transport_acknowledge_delay" },
+ { CTL_INT, NET_NETROM_TRANSPORT_BUSY_DELAY, "transport_busy_delay" },
+ { CTL_INT, NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, "transport_requested_window_size" },
+ { CTL_INT, NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, "transport_no_activity_timeout" },
+ { CTL_INT, NET_NETROM_ROUTING_CONTROL, "routing_control" },
+ { CTL_INT, NET_NETROM_LINK_FAILS_COUNT, "link_fails_count" },
+ { CTL_INT, NET_NETROM_RESET, "reset" },
+ {}
+};
+
+static const struct bin_table bin_net_ax25_param_table[] = {
+ { CTL_INT, NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" },
+ { CTL_INT, NET_AX25_DEFAULT_MODE, "ax25_default_mode" },
+ { CTL_INT, NET_AX25_BACKOFF_TYPE, "backoff_type" },
+ { CTL_INT, NET_AX25_CONNECT_MODE, "connect_mode" },
+ { CTL_INT, NET_AX25_STANDARD_WINDOW, "standard_window_size" },
+ { CTL_INT, NET_AX25_EXTENDED_WINDOW, "extended_window_size" },
+ { CTL_INT, NET_AX25_T1_TIMEOUT, "t1_timeout" },
+ { CTL_INT, NET_AX25_T2_TIMEOUT, "t2_timeout" },
+ { CTL_INT, NET_AX25_T3_TIMEOUT, "t3_timeout" },
+ { CTL_INT, NET_AX25_IDLE_TIMEOUT, "idle_timeout" },
+ { CTL_INT, NET_AX25_N2, "maximum_retry_count" },
+ { CTL_INT, NET_AX25_PACLEN, "maximum_packet_length" },
+ { CTL_INT, NET_AX25_PROTOCOL, "protocol" },
+ { CTL_INT, NET_AX25_DAMA_SLAVE_TIMEOUT, "dama_slave_timeout" },
+ {}
+};
+
+static const struct bin_table bin_net_ax25_table[] = {
+ { CTL_DIR, 0, NULL, bin_net_ax25_param_table },
+ {}
+};
+
+static const struct bin_table bin_net_rose_table[] = {
+ { CTL_INT, NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
+ { CTL_INT, NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
+ { CTL_INT, NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
+ { CTL_INT, NET_ROSE_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
+ { CTL_INT, NET_ROSE_ACK_HOLD_BACK_TIMEOUT, "acknowledge_hold_back_timeout" },
+ { CTL_INT, NET_ROSE_ROUTING_CONTROL, "routing_control" },
+ { CTL_INT, NET_ROSE_LINK_FAIL_TIMEOUT, "link_fail_timeout" },
+ { CTL_INT, NET_ROSE_MAX_VCS, "maximum_virtual_circuits" },
+ { CTL_INT, NET_ROSE_WINDOW_SIZE, "window_size" },
+ { CTL_INT, NET_ROSE_NO_ACTIVITY_TIMEOUT, "no_activity_timeout" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv6_conf_var_table[] = {
+ { CTL_INT, NET_IPV6_FORWARDING, "forwarding" },
+ { CTL_INT, NET_IPV6_HOP_LIMIT, "hop_limit" },
+ { CTL_INT, NET_IPV6_MTU, "mtu" },
+ { CTL_INT, NET_IPV6_ACCEPT_RA, "accept_ra" },
+ { CTL_INT, NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects" },
+ { CTL_INT, NET_IPV6_AUTOCONF, "autoconf" },
+ { CTL_INT, NET_IPV6_DAD_TRANSMITS, "dad_transmits" },
+ { CTL_INT, NET_IPV6_RTR_SOLICITS, "router_solicitations" },
+ { CTL_INT, NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval" },
+ { CTL_INT, NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay" },
+ { CTL_INT, NET_IPV6_USE_TEMPADDR, "use_tempaddr" },
+ { CTL_INT, NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft" },
+ { CTL_INT, NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft" },
+ { CTL_INT, NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry" },
+ { CTL_INT, NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor" },
+ { CTL_INT, NET_IPV6_MAX_ADDRESSES, "max_addresses" },
+ { CTL_INT, NET_IPV6_FORCE_MLD_VERSION, "force_mld_version" },
+ { CTL_INT, NET_IPV6_ACCEPT_RA_DEFRTR, "accept_ra_defrtr" },
+ { CTL_INT, NET_IPV6_ACCEPT_RA_PINFO, "accept_ra_pinfo" },
+ { CTL_INT, NET_IPV6_ACCEPT_RA_RTR_PREF, "accept_ra_rtr_pref" },
+ { CTL_INT, NET_IPV6_RTR_PROBE_INTERVAL, "router_probe_interval" },
+ { CTL_INT, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" },
+ { CTL_INT, NET_IPV6_PROXY_NDP, "proxy_ndp" },
+ { CTL_INT, NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv6_conf_table[] = {
+ { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv6_conf_var_table },
+ { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv6_conf_var_table },
+ { CTL_DIR, 0, NULL, bin_net_ipv6_conf_var_table },
+ {}
+};
+
+static const struct bin_table bin_net_ipv6_route_table[] = {
+ /* NET_IPV6_ROUTE_FLUSH "flush" no longer used */
+ { CTL_INT, NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" },
+ { CTL_INT, NET_IPV6_ROUTE_MAX_SIZE, "max_size" },
+ { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
+ { CTL_INT, NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout" },
+ { CTL_INT, NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval" },
+ { CTL_INT, NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity" },
+ { CTL_INT, NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires" },
+ { CTL_INT, NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss" },
+ { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv6_icmp_table[] = {
+ { CTL_INT, NET_IPV6_ICMP_RATELIMIT, "ratelimit" },
+ {}
+};
+
+static const struct bin_table bin_net_ipv6_table[] = {
+ { CTL_DIR, NET_IPV6_CONF, "conf", bin_net_ipv6_conf_table },
+ { CTL_DIR, NET_IPV6_NEIGH, "neigh", bin_net_neigh_table },
+ { CTL_DIR, NET_IPV6_ROUTE, "route", bin_net_ipv6_route_table },
+ { CTL_DIR, NET_IPV6_ICMP, "icmp", bin_net_ipv6_icmp_table },
+ { CTL_INT, NET_IPV6_BINDV6ONLY, "bindv6only" },
+ { CTL_INT, NET_IPV6_IP6FRAG_HIGH_THRESH, "ip6frag_high_thresh" },
+ { CTL_INT, NET_IPV6_IP6FRAG_LOW_THRESH, "ip6frag_low_thresh" },
+ { CTL_INT, NET_IPV6_IP6FRAG_TIME, "ip6frag_time" },
+ { CTL_INT, NET_IPV6_IP6FRAG_SECRET_INTERVAL, "ip6frag_secret_interval" },
+ { CTL_INT, NET_IPV6_MLD_MAX_MSF, "mld_max_msf" },
+ { CTL_INT, 2088 /* IPQ_QMAX */, "ip6_queue_maxlen" },
+ {}
+};
+
+static const struct bin_table bin_net_x25_table[] = {
+ { CTL_INT, NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
+ { CTL_INT, NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
+ { CTL_INT, NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
+ { CTL_INT, NET_X25_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
+ { CTL_INT, NET_X25_ACK_HOLD_BACK_TIMEOUT, "acknowledgement_hold_back_timeout" },
+ { CTL_INT, NET_X25_FORWARD, "x25_forward" },
+ {}
+};
+
+static const struct bin_table bin_net_tr_table[] = {
+ { CTL_INT, NET_TR_RIF_TIMEOUT, "rif_timeout" },
+ {}
+};
+
+
+static const struct bin_table bin_net_decnet_conf_vars[] = {
+ { CTL_INT, NET_DECNET_CONF_DEV_FORWARDING, "forwarding" },
+ { CTL_INT, NET_DECNET_CONF_DEV_PRIORITY, "priority" },
+ { CTL_INT, NET_DECNET_CONF_DEV_T2, "t2" },
+ { CTL_INT, NET_DECNET_CONF_DEV_T3, "t3" },
+ {}
+};
+
+static const struct bin_table bin_net_decnet_conf[] = {
+ { CTL_DIR, NET_DECNET_CONF_ETHER, "ethernet", bin_net_decnet_conf_vars },
+ { CTL_DIR, NET_DECNET_CONF_GRE, "ipgre", bin_net_decnet_conf_vars },
+ { CTL_DIR, NET_DECNET_CONF_X25, "x25", bin_net_decnet_conf_vars },
+ { CTL_DIR, NET_DECNET_CONF_PPP, "ppp", bin_net_decnet_conf_vars },
+ { CTL_DIR, NET_DECNET_CONF_DDCMP, "ddcmp", bin_net_decnet_conf_vars },
+ { CTL_DIR, NET_DECNET_CONF_LOOPBACK, "loopback", bin_net_decnet_conf_vars },
+ { CTL_DIR, 0, NULL, bin_net_decnet_conf_vars },
+ {}
+};
+
+static const struct bin_table bin_net_decnet_table[] = {
+ { CTL_DIR, NET_DECNET_CONF, "conf", bin_net_decnet_conf },
+ { CTL_DNADR, NET_DECNET_NODE_ADDRESS, "node_address" },
+ { CTL_STR, NET_DECNET_NODE_NAME, "node_name" },
+ { CTL_STR, NET_DECNET_DEFAULT_DEVICE, "default_device" },
+ { CTL_INT, NET_DECNET_TIME_WAIT, "time_wait" },
+ { CTL_INT, NET_DECNET_DN_COUNT, "dn_count" },
+ { CTL_INT, NET_DECNET_DI_COUNT, "di_count" },
+ { CTL_INT, NET_DECNET_DR_COUNT, "dr_count" },
+ { CTL_INT, NET_DECNET_DST_GC_INTERVAL, "dst_gc_interval" },
+ { CTL_INT, NET_DECNET_NO_FC_MAX_CWND, "no_fc_max_cwnd" },
+ { CTL_INT, NET_DECNET_MEM, "decnet_mem" },
+ { CTL_INT, NET_DECNET_RMEM, "decnet_rmem" },
+ { CTL_INT, NET_DECNET_WMEM, "decnet_wmem" },
+ { CTL_INT, NET_DECNET_DEBUG_LEVEL, "debug" },
+ {}
+};
+
+static const struct bin_table bin_net_sctp_table[] = {
+ { CTL_INT, NET_SCTP_RTO_INITIAL, "rto_initial" },
+ { CTL_INT, NET_SCTP_RTO_MIN, "rto_min" },
+ { CTL_INT, NET_SCTP_RTO_MAX, "rto_max" },
+ { CTL_INT, NET_SCTP_RTO_ALPHA, "rto_alpha_exp_divisor" },
+ { CTL_INT, NET_SCTP_RTO_BETA, "rto_beta_exp_divisor" },
+ { CTL_INT, NET_SCTP_VALID_COOKIE_LIFE, "valid_cookie_life" },
+ { CTL_INT, NET_SCTP_ASSOCIATION_MAX_RETRANS, "association_max_retrans" },
+ { CTL_INT, NET_SCTP_PATH_MAX_RETRANS, "path_max_retrans" },
+ { CTL_INT, NET_SCTP_MAX_INIT_RETRANSMITS, "max_init_retransmits" },
+ { CTL_INT, NET_SCTP_HB_INTERVAL, "hb_interval" },
+ { CTL_INT, NET_SCTP_PRESERVE_ENABLE, "cookie_preserve_enable" },
+ { CTL_INT, NET_SCTP_MAX_BURST, "max_burst" },
+ { CTL_INT, NET_SCTP_ADDIP_ENABLE, "addip_enable" },
+ { CTL_INT, NET_SCTP_PRSCTP_ENABLE, "prsctp_enable" },
+ { CTL_INT, NET_SCTP_SNDBUF_POLICY, "sndbuf_policy" },
+ { CTL_INT, NET_SCTP_SACK_TIMEOUT, "sack_timeout" },
+ { CTL_INT, NET_SCTP_RCVBUF_POLICY, "rcvbuf_policy" },
+ {}
+};
+
+static const struct bin_table bin_net_llc_llc2_timeout_table[] = {
+ { CTL_INT, NET_LLC2_ACK_TIMEOUT, "ack" },
+ { CTL_INT, NET_LLC2_P_TIMEOUT, "p" },
+ { CTL_INT, NET_LLC2_REJ_TIMEOUT, "rej" },
+ { CTL_INT, NET_LLC2_BUSY_TIMEOUT, "busy" },
+ {}
+};
+
+static const struct bin_table bin_net_llc_station_table[] = {
+ { CTL_INT, NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" },
+ {}
+};
+
+static const struct bin_table bin_net_llc_llc2_table[] = {
+ { CTL_DIR, NET_LLC2, "timeout", bin_net_llc_llc2_timeout_table },
+ {}
+};
+
+static const struct bin_table bin_net_llc_table[] = {
+ { CTL_DIR, NET_LLC2, "llc2", bin_net_llc_llc2_table },
+ { CTL_DIR, NET_LLC_STATION, "station", bin_net_llc_station_table },
+ {}
+};
+
+static const struct bin_table bin_net_netfilter_table[] = {
+ { CTL_INT, NET_NF_CONNTRACK_MAX, "nf_conntrack_max" },
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "nf_conntrack_tcp_timeout_syn_sent" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "nf_conntrack_tcp_timeout_syn_recv" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "nf_conntrack_tcp_timeout_established" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "nf_conntrack_tcp_timeout_fin_wait" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "nf_conntrack_tcp_timeout_close_wait" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "nf_conntrack_tcp_timeout_last_ack" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "nf_conntrack_tcp_timeout_time_wait" no longer used */
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "nf_conntrack_tcp_timeout_close" no longer used */
+ /* NET_NF_CONNTRACK_UDP_TIMEOUT "nf_conntrack_udp_timeout" no longer used */
+ /* NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM "nf_conntrack_udp_timeout_stream" no longer used */
+ /* NET_NF_CONNTRACK_ICMP_TIMEOUT "nf_conntrack_icmp_timeout" no longer used */
+ /* NET_NF_CONNTRACK_GENERIC_TIMEOUT "nf_conntrack_generic_timeout" no longer used */
+ { CTL_INT, NET_NF_CONNTRACK_BUCKETS, "nf_conntrack_buckets" },
+ { CTL_INT, NET_NF_CONNTRACK_LOG_INVALID, "nf_conntrack_log_invalid" },
+ /* NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "nf_conntrack_tcp_timeout_max_retrans" no longer used */
+ { CTL_INT, NET_NF_CONNTRACK_TCP_LOOSE, "nf_conntrack_tcp_loose" },
+ { CTL_INT, NET_NF_CONNTRACK_TCP_BE_LIBERAL, "nf_conntrack_tcp_be_liberal" },
+ { CTL_INT, NET_NF_CONNTRACK_TCP_MAX_RETRANS, "nf_conntrack_tcp_max_retrans" },
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "nf_conntrack_sctp_timeout_closed" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "nf_conntrack_sctp_timeout_cookie_wait" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "nf_conntrack_sctp_timeout_cookie_echoed" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "nf_conntrack_sctp_timeout_established" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "nf_conntrack_sctp_timeout_shutdown_sent" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "nf_conntrack_sctp_timeout_shutdown_recd" no longer used */
+ /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "nf_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */
+ { CTL_INT, NET_NF_CONNTRACK_COUNT, "nf_conntrack_count" },
+ /* NET_NF_CONNTRACK_ICMPV6_TIMEOUT "nf_conntrack_icmpv6_timeout" no longer used */
+ /* NET_NF_CONNTRACK_FRAG6_TIMEOUT "nf_conntrack_frag6_timeout" no longer used */
+ { CTL_INT, NET_NF_CONNTRACK_FRAG6_LOW_THRESH, "nf_conntrack_frag6_low_thresh" },
+ { CTL_INT, NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, "nf_conntrack_frag6_high_thresh" },
+ { CTL_INT, NET_NF_CONNTRACK_CHECKSUM, "nf_conntrack_checksum" },
+
+ {}
+};
+
+static const struct bin_table bin_net_irda_table[] = {
+ { CTL_INT, NET_IRDA_DISCOVERY, "discovery" },
+ { CTL_STR, NET_IRDA_DEVNAME, "devname" },
+ { CTL_INT, NET_IRDA_DEBUG, "debug" },
+ { CTL_INT, NET_IRDA_FAST_POLL, "fast_poll_increase" },
+ { CTL_INT, NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" },
+ { CTL_INT, NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" },
+ { CTL_INT, NET_IRDA_SLOT_TIMEOUT, "slot_timeout" },
+ { CTL_INT, NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" },
+ { CTL_INT, NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" },
+ { CTL_INT, NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" },
+ { CTL_INT, NET_IRDA_MAX_TX_WINDOW, "max_tx_window" },
+ { CTL_INT, NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" },
+ { CTL_INT, NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" },
+ { CTL_INT, NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" },
+ {}
+};
+
+static const struct bin_table bin_net_table[] = {
+ { CTL_DIR, NET_CORE, "core", bin_net_core_table },
+ /* NET_ETHER not used */
+ /* NET_802 not used */
+ { CTL_DIR, NET_UNIX, "unix", bin_net_unix_table },
+ { CTL_DIR, NET_IPV4, "ipv4", bin_net_ipv4_table },
+ { CTL_DIR, NET_IPX, "ipx", bin_net_ipx_table },
+ { CTL_DIR, NET_ATALK, "appletalk", bin_net_atalk_table },
+ { CTL_DIR, NET_NETROM, "netrom", bin_net_netrom_table },
+ { CTL_DIR, NET_AX25, "ax25", bin_net_ax25_table },
+ /* NET_BRIDGE "bridge" no longer used */
+ { CTL_DIR, NET_ROSE, "rose", bin_net_rose_table },
+ { CTL_DIR, NET_IPV6, "ipv6", bin_net_ipv6_table },
+ { CTL_DIR, NET_X25, "x25", bin_net_x25_table },
+ { CTL_DIR, NET_TR, "token-ring", bin_net_tr_table },
+ { CTL_DIR, NET_DECNET, "decnet", bin_net_decnet_table },
+ /* NET_ECONET not used */
+ { CTL_DIR, NET_SCTP, "sctp", bin_net_sctp_table },
+ { CTL_DIR, NET_LLC, "llc", bin_net_llc_table },
+ { CTL_DIR, NET_NETFILTER, "netfilter", bin_net_netfilter_table },
+ /* NET_DCCP "dccp" no longer used */
+ { CTL_DIR, NET_IRDA, "irda", bin_net_irda_table },
+ { CTL_INT, 2089, "nf_conntrack_max" },
+ {}
+};
+
+static const struct bin_table bin_fs_quota_table[] = {
+ { CTL_INT, FS_DQ_LOOKUPS, "lookups" },
+ { CTL_INT, FS_DQ_DROPS, "drops" },
+ { CTL_INT, FS_DQ_READS, "reads" },
+ { CTL_INT, FS_DQ_WRITES, "writes" },
+ { CTL_INT, FS_DQ_CACHE_HITS, "cache_hits" },
+ { CTL_INT, FS_DQ_ALLOCATED, "allocated_dquots" },
+ { CTL_INT, FS_DQ_FREE, "free_dquots" },
+ { CTL_INT, FS_DQ_SYNCS, "syncs" },
+ { CTL_INT, FS_DQ_WARNINGS, "warnings" },
+ {}
+};
+
+static const struct bin_table bin_fs_xfs_table[] = {
+ { CTL_INT, XFS_SGID_INHERIT, "irix_sgid_inherit" },
+ { CTL_INT, XFS_SYMLINK_MODE, "irix_symlink_mode" },
+ { CTL_INT, XFS_PANIC_MASK, "panic_mask" },
+
+ { CTL_INT, XFS_ERRLEVEL, "error_level" },
+ { CTL_INT, XFS_SYNCD_TIMER, "xfssyncd_centisecs" },
+ { CTL_INT, XFS_INHERIT_SYNC, "inherit_sync" },
+ { CTL_INT, XFS_INHERIT_NODUMP, "inherit_nodump" },
+ { CTL_INT, XFS_INHERIT_NOATIME, "inherit_noatime" },
+ { CTL_INT, XFS_BUF_TIMER, "xfsbufd_centisecs" },
+ { CTL_INT, XFS_BUF_AGE, "age_buffer_centisecs" },
+ { CTL_INT, XFS_INHERIT_NOSYM, "inherit_nosymlinks" },
+ { CTL_INT, XFS_ROTORSTEP, "rotorstep" },
+ { CTL_INT, XFS_INHERIT_NODFRG, "inherit_nodefrag" },
+ { CTL_INT, XFS_FILESTREAM_TIMER, "filestream_centisecs" },
+ { CTL_INT, XFS_STATS_CLEAR, "stats_clear" },
+ {}
+};
+
+static const struct bin_table bin_fs_ocfs2_nm_table[] = {
+ { CTL_STR, 1, "hb_ctl_path" },
+ {}
+};
+
+static const struct bin_table bin_fs_ocfs2_table[] = {
+ { CTL_DIR, 1, "nm", bin_fs_ocfs2_nm_table },
+ {}
+};
+
+static const struct bin_table bin_inotify_table[] = {
+ { CTL_INT, INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
+ { CTL_INT, INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
+ { CTL_INT, INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
+ {}
+};
+
+static const struct bin_table bin_fs_table[] = {
+ { CTL_INT, FS_NRINODE, "inode-nr" },
+ { CTL_INT, FS_STATINODE, "inode-state" },
+ /* FS_MAXINODE unused */
+ /* FS_NRDQUOT unused */
+ /* FS_MAXDQUOT unused */
+ /* FS_NRFILE "file-nr" no longer used */
+ { CTL_INT, FS_MAXFILE, "file-max" },
+ { CTL_INT, FS_DENTRY, "dentry-state" },
+ /* FS_NRSUPER unused */
+ /* FS_MAXUPSER unused */
+ { CTL_INT, FS_OVERFLOWUID, "overflowuid" },
+ { CTL_INT, FS_OVERFLOWGID, "overflowgid" },
+ { CTL_INT, FS_LEASES, "leases-enable" },
+ { CTL_INT, FS_DIR_NOTIFY, "dir-notify-enable" },
+ { CTL_INT, FS_LEASE_TIME, "lease-break-time" },
+ { CTL_DIR, FS_DQSTATS, "quota", bin_fs_quota_table },
+ { CTL_DIR, FS_XFS, "xfs", bin_fs_xfs_table },
+ { CTL_ULONG, FS_AIO_NR, "aio-nr" },
+ { CTL_ULONG, FS_AIO_MAX_NR, "aio-max-nr" },
+ { CTL_DIR, FS_INOTIFY, "inotify", bin_inotify_table },
+ { CTL_DIR, FS_OCFS2, "ocfs2", bin_fs_ocfs2_table },
+ { CTL_INT, KERN_SETUID_DUMPABLE, "suid_dumpable" },
+ {}
+};
+
+static const struct bin_table bin_ipmi_table[] = {
+ { CTL_INT, DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" },
+ {}
+};
+
+static const struct bin_table bin_mac_hid_files[] = {
+ /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */
+ /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */
+ { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" },
+ { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE, "mouse_button2_keycode" },
+ { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE, "mouse_button3_keycode" },
+ /* DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES unused */
+ {}
+};
+
+static const struct bin_table bin_raid_table[] = {
+ { CTL_INT, DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" },
+ { CTL_INT, DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" },
+ {}
+};
+
+static const struct bin_table bin_scsi_table[] = {
+ { CTL_INT, DEV_SCSI_LOGGING_LEVEL, "logging_level" },
+ {}
+};
+
+static const struct bin_table bin_dev_table[] = {
+ /* DEV_CDROM "cdrom" no longer used */
+ /* DEV_HWMON unused */
+ /* DEV_PARPORT "parport" no longer used */
+ { CTL_DIR, DEV_RAID, "raid", bin_raid_table },
+ { CTL_DIR, DEV_MAC_HID, "mac_hid", bin_mac_hid_files },
+ { CTL_DIR, DEV_SCSI, "scsi", bin_scsi_table },
+ { CTL_DIR, DEV_IPMI, "ipmi", bin_ipmi_table },
+ {}
+};
+
+static const struct bin_table bin_bus_isa_table[] = {
+ { CTL_INT, BUS_ISA_MEM_BASE, "membase" },
+ { CTL_INT, BUS_ISA_PORT_BASE, "portbase" },
+ { CTL_INT, BUS_ISA_PORT_SHIFT, "portshift" },
+ {}
+};
+
+static const struct bin_table bin_bus_table[] = {
+ { CTL_DIR, CTL_BUS_ISA, "isa", bin_bus_isa_table },
+ {}
+};
+
+
+static const struct bin_table bin_s390dbf_table[] = {
+ { CTL_INT, 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" },
+ { CTL_INT, 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" },
+ {}
+};
+
+static const struct bin_table bin_sunrpc_table[] = {
+ /* CTL_RPCDEBUG "rpc_debug" no longer used */
+ /* CTL_NFSDEBUG "nfs_debug" no longer used */
+ /* CTL_NFSDDEBUG "nfsd_debug" no longer used */
+ /* CTL_NLMDEBUG "nlm_debug" no longer used */
+
+ { CTL_INT, CTL_SLOTTABLE_UDP, "udp_slot_table_entries" },
+ { CTL_INT, CTL_SLOTTABLE_TCP, "tcp_slot_table_entries" },
+ { CTL_INT, CTL_MIN_RESVPORT, "min_resvport" },
+ { CTL_INT, CTL_MAX_RESVPORT, "max_resvport" },
+ {}
+};
+
+static const struct bin_table bin_pm_table[] = {
+ /* frv specific */
+ /* 1 == CTL_PM_SUSPEND "suspend" no longer used" */
+ { CTL_INT, 2 /* CTL_PM_CMODE */, "cmode" },
+ { CTL_INT, 3 /* CTL_PM_P0 */, "p0" },
+ { CTL_INT, 4 /* CTL_PM_CM */, "cm" },
+ {}
+};
+
+static const struct bin_table bin_root_table[] = {
+ { CTL_DIR, CTL_KERN, "kernel", bin_kern_table },
+ { CTL_DIR, CTL_VM, "vm", bin_vm_table },
+ { CTL_DIR, CTL_NET, "net", bin_net_table },
+ /* CTL_PROC not used */
+ { CTL_DIR, CTL_FS, "fs", bin_fs_table },
+ /* CTL_DEBUG "debug" no longer used */
+ { CTL_DIR, CTL_DEV, "dev", bin_dev_table },
+ { CTL_DIR, CTL_BUS, "bus", bin_bus_table },
+ { CTL_DIR, CTL_ABI, "abi" },
+ /* CTL_CPU not used */
+ /* CTL_ARLAN "arlan" no longer used */
+ { CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table },
+ { CTL_DIR, CTL_SUNRPC, "sunrpc", bin_sunrpc_table },
+ { CTL_DIR, CTL_PM, "pm", bin_pm_table },
+ {}
+};
+
+static ssize_t bin_dir(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ return -ENOTDIR;
+}
+
+
+static ssize_t bin_string(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ ssize_t result, copied = 0;
+
+ if (oldval && oldlen) {
+ char __user *lastp;
+ loff_t pos = 0;
+ int ch;
+
+ result = vfs_read(file, oldval, oldlen, &pos);
+ if (result < 0)
+ goto out;
+
+ copied = result;
+ lastp = oldval + copied - 1;
+
+ result = -EFAULT;
+ if (get_user(ch, lastp))
+ goto out;
+
+ /* Trim off the trailing newline */
+ if (ch == '\n') {
+ result = -EFAULT;
+ if (put_user('\0', lastp))
+ goto out;
+ copied -= 1;
+ }
+ }
+
+ if (newval && newlen) {
+ loff_t pos = 0;
+
+ result = vfs_write(file, newval, newlen, &pos);
+ if (result < 0)
+ goto out;
+ }
+
+ result = copied;
+out:
+ return result;
+}
+
+static ssize_t bin_intvec(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ mm_segment_t old_fs = get_fs();
+ ssize_t copied = 0;
+ char *buffer;
+ ssize_t result;
+
+ result = -ENOMEM;
+ buffer = kmalloc(BUFSZ, GFP_KERNEL);
+ if (!buffer)
+ goto out;
+
+ if (oldval && oldlen) {
+ unsigned __user *vec = oldval;
+ size_t length = oldlen / sizeof(*vec);
+ loff_t pos = 0;
+ char *str, *end;
+ int i;
+
+ set_fs(KERNEL_DS);
+ result = vfs_read(file, buffer, BUFSZ - 1, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out_kfree;
+
+ str = buffer;
+ end = str + result;
+ *end++ = '\0';
+ for (i = 0; i < length; i++) {
+ unsigned long value;
+
+ value = simple_strtoul(str, &str, 10);
+ while (isspace(*str))
+ str++;
+
+ result = -EFAULT;
+ if (put_user(value, vec + i))
+ goto out_kfree;
+
+ copied += sizeof(*vec);
+ if (!isdigit(*str))
+ break;
+ }
+ }
+
+ if (newval && newlen) {
+ unsigned __user *vec = newval;
+ size_t length = newlen / sizeof(*vec);
+ loff_t pos = 0;
+ char *str, *end;
+ int i;
+
+ str = buffer;
+ end = str + BUFSZ;
+ for (i = 0; i < length; i++) {
+ unsigned long value;
+
+ result = -EFAULT;
+ if (get_user(value, vec + i))
+ goto out_kfree;
+
+ str += snprintf(str, end - str, "%lu\t", value);
+ }
+
+ set_fs(KERNEL_DS);
+ result = vfs_write(file, buffer, str - buffer, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out_kfree;
+ }
+ result = copied;
+out_kfree:
+ kfree(buffer);
+out:
+ return result;
+}
+
+static ssize_t bin_ulongvec(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ mm_segment_t old_fs = get_fs();
+ ssize_t copied = 0;
+ char *buffer;
+ ssize_t result;
+
+ result = -ENOMEM;
+ buffer = kmalloc(BUFSZ, GFP_KERNEL);
+ if (!buffer)
+ goto out;
+
+ if (oldval && oldlen) {
+ unsigned long __user *vec = oldval;
+ size_t length = oldlen / sizeof(*vec);
+ loff_t pos = 0;
+ char *str, *end;
+ int i;
+
+ set_fs(KERNEL_DS);
+ result = vfs_read(file, buffer, BUFSZ - 1, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out_kfree;
+
+ str = buffer;
+ end = str + result;
+ *end++ = '\0';
+ for (i = 0; i < length; i++) {
+ unsigned long value;
+
+ value = simple_strtoul(str, &str, 10);
+ while (isspace(*str))
+ str++;
+
+ result = -EFAULT;
+ if (put_user(value, vec + i))
+ goto out_kfree;
+
+ copied += sizeof(*vec);
+ if (!isdigit(*str))
+ break;
+ }
+ }
+
+ if (newval && newlen) {
+ unsigned long __user *vec = newval;
+ size_t length = newlen / sizeof(*vec);
+ loff_t pos = 0;
+ char *str, *end;
+ int i;
+
+ str = buffer;
+ end = str + BUFSZ;
+ for (i = 0; i < length; i++) {
+ unsigned long value;
+
+ result = -EFAULT;
+ if (get_user(value, vec + i))
+ goto out_kfree;
+
+ str += snprintf(str, end - str, "%lu\t", value);
+ }
+
+ set_fs(KERNEL_DS);
+ result = vfs_write(file, buffer, str - buffer, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out_kfree;
+ }
+ result = copied;
+out_kfree:
+ kfree(buffer);
+out:
+ return result;
+}
+
+static unsigned hex_value(int ch)
+{
+ return isdigit(ch) ? ch - '0' : ((ch | 0x20) - 'a') + 10;
+}
+
+static ssize_t bin_uuid(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ mm_segment_t old_fs = get_fs();
+ ssize_t result, copied = 0;
+
+ /* Only supports reads */
+ if (oldval && oldlen) {
+ loff_t pos = 0;
+ char buf[40], *str = buf;
+ unsigned char uuid[16];
+ int i;
+
+ set_fs(KERNEL_DS);
+ result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out;
+
+ buf[result] = '\0';
+
+ /* Convert the uuid to from a string to binary */
+ for (i = 0; i < 16; i++) {
+ result = -EIO;
+ if (!isxdigit(str[0]) || !isxdigit(str[1]))
+ goto out;
+
+ uuid[i] = (hex_value(str[0]) << 4) | hex_value(str[1]);
+ str += 2;
+ if (*str == '-')
+ str++;
+ }
+
+ if (oldlen > 16)
+ oldlen = 16;
+
+ result = -EFAULT;
+ if (copy_to_user(oldval, uuid, oldlen))
+ goto out;
+
+ copied = oldlen;
+ }
+ result = copied;
+out:
+ return result;
+}
+
+static ssize_t bin_dn_node_address(struct file *file,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ mm_segment_t old_fs = get_fs();
+ ssize_t result, copied = 0;
+
+ if (oldval && oldlen) {
+ loff_t pos = 0;
+ char buf[15], *nodep;
+ unsigned long area, node;
+ __le16 dnaddr;
+
+ set_fs(KERNEL_DS);
+ result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out;
+
+ buf[result] = '\0';
+
+ /* Convert the decnet addresss to binary */
+ result = -EIO;
+ nodep = strchr(buf, '.') + 1;
+ if (!nodep)
+ goto out;
+
+ area = simple_strtoul(buf, NULL, 10);
+ node = simple_strtoul(nodep, NULL, 10);
+
+ result = -EIO;
+ if ((area > 63)||(node > 1023))
+ goto out;
+
+ dnaddr = cpu_to_le16((area << 10) | node);
+
+ result = -EFAULT;
+ if (put_user(dnaddr, (__le16 __user *)oldval))
+ goto out;
+
+ copied = sizeof(dnaddr);
+ }
+
+ if (newval && newlen) {
+ loff_t pos = 0;
+ __le16 dnaddr;
+ char buf[15];
+ int len;
+
+ result = -EINVAL;
+ if (newlen != sizeof(dnaddr))
+ goto out;
+
+ result = -EFAULT;
+ if (get_user(dnaddr, (__le16 __user *)newval))
+ goto out;
+
+ len = snprintf(buf, sizeof(buf), "%hu.%hu",
+ le16_to_cpu(dnaddr) >> 10,
+ le16_to_cpu(dnaddr) & 0x3ff);
+
+ set_fs(KERNEL_DS);
+ result = vfs_write(file, buf, len, &pos);
+ set_fs(old_fs);
+ if (result < 0)
+ goto out;
+ }
+
+ result = copied;
+out:
+ return result;
+}
+
+static const struct bin_table *get_sysctl(const int *name, int nlen, char *path)
+{
+ const struct bin_table *table = &bin_root_table[0];
+ int ctl_name;
+
+ /* The binary sysctl tables have a small maximum depth so
+ * there is no danger of overflowing our path as it PATH_MAX
+ * bytes long.
+ */
+ memcpy(path, "sys/", 4);
+ path += 4;
+
+repeat:
+ if (!nlen)
+ return ERR_PTR(-ENOTDIR);
+ ctl_name = *name;
+ name++;
+ nlen--;
+ for ( ; table->convert; table++) {
+ int len = 0;
+
+ /*
+ * For a wild card entry map from ifindex to network
+ * device name.
+ */
+ if (!table->ctl_name) {
+#ifdef CONFIG_NET
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev;
+ dev = dev_get_by_index(net, ctl_name);
+ if (dev) {
+ len = strlen(dev->name);
+ memcpy(path, dev->name, len);
+ dev_put(dev);
+ }
+#endif
+ /* Use the well known sysctl number to proc name mapping */
+ } else if (ctl_name == table->ctl_name) {
+ len = strlen(table->procname);
+ memcpy(path, table->procname, len);
+ }
+ if (len) {
+ path += len;
+ if (table->child) {
+ *path++ = '/';
+ table = table->child;
+ goto repeat;
+ }
+ *path = '\0';
+ return table;
+ }
+ }
+ return ERR_PTR(-ENOTDIR);
+}
+
+static char *sysctl_getname(const int *name, int nlen, const struct bin_table **tablep)
+{
+ char *tmp, *result;
+
+ result = ERR_PTR(-ENOMEM);
+ tmp = __getname();
+ if (tmp) {
+ const struct bin_table *table = get_sysctl(name, nlen, tmp);
+ result = tmp;
+ *tablep = table;
+ if (IS_ERR(table)) {
+ __putname(tmp);
+ result = ERR_CAST(table);
+ }
+ }
+ return result;
+}
+
+static ssize_t binary_sysctl(const int *name, int nlen,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ const struct bin_table *table = NULL;
+ struct nameidata nd;
+ struct vfsmount *mnt;
+ struct file *file;
+ ssize_t result;
+ char *pathname;
+ int flags;
+ int acc_mode, fmode;
+
+ pathname = sysctl_getname(name, nlen, &table);
+ result = PTR_ERR(pathname);
+ if (IS_ERR(pathname))
+ goto out;
+
+ /* How should the sysctl be accessed? */
+ if (oldval && oldlen && newval && newlen) {
+ flags = O_RDWR;
+ acc_mode = MAY_READ | MAY_WRITE;
+ fmode = FMODE_READ | FMODE_WRITE;
+ } else if (newval && newlen) {
+ flags = O_WRONLY;
+ acc_mode = MAY_WRITE;
+ fmode = FMODE_WRITE;
+ } else if (oldval && oldlen) {
+ flags = O_RDONLY;
+ acc_mode = MAY_READ;
+ fmode = FMODE_READ;
+ } else {
+ result = 0;
+ goto out_putname;
+ }
+
+ mnt = current->nsproxy->pid_ns->proc_mnt;
+ result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd);
+ if (result)
+ goto out_putname;
+
+ result = may_open(&nd.path, acc_mode, fmode);
+ if (result)
+ goto out_putpath;
+
+ file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
+ result = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out_putname;
+
+ result = table->convert(file, oldval, oldlen, newval, newlen);
+
+ fput(file);
+out_putname:
+ putname(pathname);
+out:
+ return result;
+
+out_putpath:
+ path_put(&nd.path);
+ goto out_putname;
+}
+
+
+#else /* CONFIG_SYSCTL_SYSCALL */
+
+static ssize_t binary_sysctl(const int *name, int nlen,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_SYSCTL_SYSCALL */
+
+
+static void deprecated_sysctl_warning(const int *name, int nlen)
+{
+ int i;
+
+ /*
+ * CTL_KERN/KERN_VERSION is used by older glibc and cannot
+ * ever go away.
+ */
+ if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+ return;
+
+ if (printk_ratelimit()) {
+ printk(KERN_INFO
+ "warning: process `%s' used the deprecated sysctl "
+ "system call with ", current->comm);
+ for (i = 0; i < nlen; i++)
+ printk("%d.", name[i]);
+ printk("\n");
+ }
+ return;
+}
+
+static ssize_t do_sysctl(int __user *args_name, int nlen,
+ void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
+{
+ int name[CTL_MAXNAME];
+ int i;
+
+ /* Check args->nlen. */
+ if (nlen < 0 || nlen > CTL_MAXNAME)
+ return -ENOTDIR;
+ /* Read in the sysctl name for simplicity */
+ for (i = 0; i < nlen; i++)
+ if (get_user(name[i], args_name + i))
+ return -EFAULT;
+
+ deprecated_sysctl_warning(name, nlen);
+
+ return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
+}
+
+SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
+{
+ struct __sysctl_args tmp;
+ size_t oldlen = 0;
+ ssize_t result;
+
+ if (copy_from_user(&tmp, args, sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.oldval && !tmp.oldlenp)
+ return -EFAULT;
+
+ if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp))
+ return -EFAULT;
+
+ result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen,
+ tmp.newval, tmp.newlen);
+
+ if (result >= 0) {
+ oldlen = result;
+ result = 0;
+ }
+
+ if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp))
+ return -EFAULT;
+
+ return result;
+}
+
+
+#ifdef CONFIG_COMPAT
+#include <asm/compat.h>
+
+struct compat_sysctl_args {
+ compat_uptr_t name;
+ int nlen;
+ compat_uptr_t oldval;
+ compat_uptr_t oldlenp;
+ compat_uptr_t newval;
+ compat_size_t newlen;
+ compat_ulong_t __unused[4];
+};
+
+asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args)
+{
+ struct compat_sysctl_args tmp;
+ compat_size_t __user *compat_oldlenp;
+ size_t oldlen = 0;
+ ssize_t result;
+
+ if (copy_from_user(&tmp, args, sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.oldval && !tmp.oldlenp)
+ return -EFAULT;
+
+ compat_oldlenp = compat_ptr(tmp.oldlenp);
+ if (compat_oldlenp && get_user(oldlen, compat_oldlenp))
+ return -EFAULT;
+
+ result = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
+ compat_ptr(tmp.oldval), oldlen,
+ compat_ptr(tmp.newval), tmp.newlen);
+
+ if (result >= 0) {
+ oldlen = result;
+ result = 0;
+ }
+
+ if (compat_oldlenp && put_user(oldlen, compat_oldlenp))
+ return -EFAULT;
+
+ return result;
+}
+
+#endif /* CONFIG_COMPAT */
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index b6e7aaea4604..04cdcf72c827 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -5,1239 +5,6 @@
#include <linux/string.h>
#include <net/ip_vs.h>
-struct trans_ctl_table {
- int ctl_name;
- const char *procname;
- const struct trans_ctl_table *child;
-};
-
-static const struct trans_ctl_table trans_random_table[] = {
- { RANDOM_POOLSIZE, "poolsize" },
- { RANDOM_ENTROPY_COUNT, "entropy_avail" },
- { RANDOM_READ_THRESH, "read_wakeup_threshold" },
- { RANDOM_WRITE_THRESH, "write_wakeup_threshold" },
- { RANDOM_BOOT_ID, "boot_id" },
- { RANDOM_UUID, "uuid" },
- {}
-};
-
-static const struct trans_ctl_table trans_pty_table[] = {
- { PTY_MAX, "max" },
- { PTY_NR, "nr" },
- {}
-};
-
-static const struct trans_ctl_table trans_kern_table[] = {
- { KERN_OSTYPE, "ostype" },
- { KERN_OSRELEASE, "osrelease" },
- /* KERN_OSREV not used */
- { KERN_VERSION, "version" },
- /* KERN_SECUREMASK not used */
- /* KERN_PROF not used */
- { KERN_NODENAME, "hostname" },
- { KERN_DOMAINNAME, "domainname" },
-
- { KERN_PANIC, "panic" },
- { KERN_REALROOTDEV, "real-root-dev" },
-
- { KERN_SPARC_REBOOT, "reboot-cmd" },
- { KERN_CTLALTDEL, "ctrl-alt-del" },
- { KERN_PRINTK, "printk" },
-
- /* KERN_NAMETRANS not used */
- /* KERN_PPC_HTABRECLAIM not used */
- /* KERN_PPC_ZEROPAGED not used */
- { KERN_PPC_POWERSAVE_NAP, "powersave-nap" },
-
- { KERN_MODPROBE, "modprobe" },
- { KERN_SG_BIG_BUFF, "sg-big-buff" },
- { KERN_ACCT, "acct" },
- { KERN_PPC_L2CR, "l2cr" },
-
- /* KERN_RTSIGNR not used */
- /* KERN_RTSIGMAX not used */
-
- { KERN_SHMMAX, "shmmax" },
- { KERN_MSGMAX, "msgmax" },
- { KERN_MSGMNB, "msgmnb" },
- /* KERN_MSGPOOL not used*/
- { KERN_SYSRQ, "sysrq" },
- { KERN_MAX_THREADS, "threads-max" },
- { KERN_RANDOM, "random", trans_random_table },
- { KERN_SHMALL, "shmall" },
- { KERN_MSGMNI, "msgmni" },
- { KERN_SEM, "sem" },
- { KERN_SPARC_STOP_A, "stop-a" },
- { KERN_SHMMNI, "shmmni" },
-
- { KERN_OVERFLOWUID, "overflowuid" },
- { KERN_OVERFLOWGID, "overflowgid" },
-
- { KERN_HOTPLUG, "hotplug", },
- { KERN_IEEE_EMULATION_WARNINGS, "ieee_emulation_warnings" },
-
- { KERN_S390_USER_DEBUG_LOGGING, "userprocess_debug" },
- { KERN_CORE_USES_PID, "core_uses_pid" },
- { KERN_TAINTED, "tainted" },
- { KERN_CADPID, "cad_pid" },
- { KERN_PIDMAX, "pid_max" },
- { KERN_CORE_PATTERN, "core_pattern" },
- { KERN_PANIC_ON_OOPS, "panic_on_oops" },
- { KERN_HPPA_PWRSW, "soft-power" },
- { KERN_HPPA_UNALIGNED, "unaligned-trap" },
-
- { KERN_PRINTK_RATELIMIT, "printk_ratelimit" },
- { KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" },
-
- { KERN_PTY, "pty", trans_pty_table },
- { KERN_NGROUPS_MAX, "ngroups_max" },
- { KERN_SPARC_SCONS_PWROFF, "scons-poweroff" },
- { KERN_HZ_TIMER, "hz_timer" },
- { KERN_UNKNOWN_NMI_PANIC, "unknown_nmi_panic" },
- { KERN_BOOTLOADER_TYPE, "bootloader_type" },
- { KERN_RANDOMIZE, "randomize_va_space" },
-
- { KERN_SPIN_RETRY, "spin_retry" },
- { KERN_ACPI_VIDEO_FLAGS, "acpi_video_flags" },
- { KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
- { KERN_COMPAT_LOG, "compat-log" },
- { KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
- { KERN_NMI_WATCHDOG, "nmi_watchdog" },
- { KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
- {}
-};
-
-static const struct trans_ctl_table trans_vm_table[] = {
- { VM_OVERCOMMIT_MEMORY, "overcommit_memory" },
- { VM_PAGE_CLUSTER, "page-cluster" },
- { VM_DIRTY_BACKGROUND, "dirty_background_ratio" },
- { VM_DIRTY_RATIO, "dirty_ratio" },
- { VM_DIRTY_WB_CS, "dirty_writeback_centisecs" },
- { VM_DIRTY_EXPIRE_CS, "dirty_expire_centisecs" },
- { VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" },
- { VM_OVERCOMMIT_RATIO, "overcommit_ratio" },
- /* VM_PAGEBUF unused */
- { VM_HUGETLB_PAGES, "nr_hugepages" },
- { VM_SWAPPINESS, "swappiness" },
- { VM_LOWMEM_RESERVE_RATIO, "lowmem_reserve_ratio" },
- { VM_MIN_FREE_KBYTES, "min_free_kbytes" },
- { VM_MAX_MAP_COUNT, "max_map_count" },
- { VM_LAPTOP_MODE, "laptop_mode" },
- { VM_BLOCK_DUMP, "block_dump" },
- { VM_HUGETLB_GROUP, "hugetlb_shm_group" },
- { VM_VFS_CACHE_PRESSURE, "vfs_cache_pressure" },
- { VM_LEGACY_VA_LAYOUT, "legacy_va_layout" },
- /* VM_SWAP_TOKEN_TIMEOUT unused */
- { VM_DROP_PAGECACHE, "drop_caches" },
- { VM_PERCPU_PAGELIST_FRACTION, "percpu_pagelist_fraction" },
- { VM_ZONE_RECLAIM_MODE, "zone_reclaim_mode" },
- { VM_MIN_UNMAPPED, "min_unmapped_ratio" },
- { VM_PANIC_ON_OOM, "panic_on_oom" },
- { VM_VDSO_ENABLED, "vdso_enabled" },
- { VM_MIN_SLAB, "min_slab_ratio" },
-
- {}
-};
-
-static const struct trans_ctl_table trans_net_core_table[] = {
- { NET_CORE_WMEM_MAX, "wmem_max" },
- { NET_CORE_RMEM_MAX, "rmem_max" },
- { NET_CORE_WMEM_DEFAULT, "wmem_default" },
- { NET_CORE_RMEM_DEFAULT, "rmem_default" },
- /* NET_CORE_DESTROY_DELAY unused */
- { NET_CORE_MAX_BACKLOG, "netdev_max_backlog" },
- /* NET_CORE_FASTROUTE unused */
- { NET_CORE_MSG_COST, "message_cost" },
- { NET_CORE_MSG_BURST, "message_burst" },
- { NET_CORE_OPTMEM_MAX, "optmem_max" },
- /* NET_CORE_HOT_LIST_LENGTH unused */
- /* NET_CORE_DIVERT_VERSION unused */
- /* NET_CORE_NO_CONG_THRESH unused */
- /* NET_CORE_NO_CONG unused */
- /* NET_CORE_LO_CONG unused */
- /* NET_CORE_MOD_CONG unused */
- { NET_CORE_DEV_WEIGHT, "dev_weight" },
- { NET_CORE_SOMAXCONN, "somaxconn" },
- { NET_CORE_BUDGET, "netdev_budget" },
- { NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" },
- { NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" },
- { NET_CORE_WARNINGS, "warnings" },
- {},
-};
-
-static const struct trans_ctl_table trans_net_unix_table[] = {
- /* NET_UNIX_DESTROY_DELAY unused */
- /* NET_UNIX_DELETE_DELAY unused */
- { NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv4_route_table[] = {
- { NET_IPV4_ROUTE_FLUSH, "flush" },
- { NET_IPV4_ROUTE_MIN_DELAY, "min_delay" },
- { NET_IPV4_ROUTE_MAX_DELAY, "max_delay" },
- { NET_IPV4_ROUTE_GC_THRESH, "gc_thresh" },
- { NET_IPV4_ROUTE_MAX_SIZE, "max_size" },
- { NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
- { NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
- { NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
- { NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
- { NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
- { NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
- { NET_IPV4_ROUTE_ERROR_COST, "error_cost" },
- { NET_IPV4_ROUTE_ERROR_BURST, "error_burst" },
- { NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity" },
- { NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
- { NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
- { NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
- { NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
- { NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
- { NET_IPV4_CONF_FORWARDING, "forwarding" },
- { NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" },
-
- { NET_IPV4_CONF_PROXY_ARP, "proxy_arp" },
- { NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects" },
- { NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects" },
- { NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects" },
- { NET_IPV4_CONF_SHARED_MEDIA, "shared_media" },
- { NET_IPV4_CONF_RP_FILTER, "rp_filter" },
- { NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
- { NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay" },
- { NET_IPV4_CONF_LOG_MARTIANS, "log_martians" },
- { NET_IPV4_CONF_TAG, "tag" },
- { NET_IPV4_CONF_ARPFILTER, "arp_filter" },
- { NET_IPV4_CONF_MEDIUM_ID, "medium_id" },
- { NET_IPV4_CONF_NOXFRM, "disable_xfrm" },
- { NET_IPV4_CONF_NOPOLICY, "disable_policy" },
- { NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" },
-
- { NET_IPV4_CONF_ARP_ANNOUNCE, "arp_announce" },
- { NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
- { NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
- { NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
- { NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv4_conf_table[] = {
- { NET_PROTO_CONF_ALL, "all", trans_net_ipv4_conf_vars_table },
- { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv4_conf_vars_table },
- { 0, NULL, trans_net_ipv4_conf_vars_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_neigh_vars_table[] = {
- { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" },
- { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" },
- { NET_NEIGH_APP_SOLICIT, "app_solicit" },
- { NET_NEIGH_RETRANS_TIME, "retrans_time" },
- { NET_NEIGH_REACHABLE_TIME, "base_reachable_time" },
- { NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time" },
- { NET_NEIGH_GC_STALE_TIME, "gc_stale_time" },
- { NET_NEIGH_UNRES_QLEN, "unres_qlen" },
- { NET_NEIGH_PROXY_QLEN, "proxy_qlen" },
- { NET_NEIGH_ANYCAST_DELAY, "anycast_delay" },
- { NET_NEIGH_PROXY_DELAY, "proxy_delay" },
- { NET_NEIGH_LOCKTIME, "locktime" },
- { NET_NEIGH_GC_INTERVAL, "gc_interval" },
- { NET_NEIGH_GC_THRESH1, "gc_thresh1" },
- { NET_NEIGH_GC_THRESH2, "gc_thresh2" },
- { NET_NEIGH_GC_THRESH3, "gc_thresh3" },
- { NET_NEIGH_RETRANS_TIME_MS, "retrans_time_ms" },
- { NET_NEIGH_REACHABLE_TIME_MS, "base_reachable_time_ms" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_neigh_table[] = {
- { NET_PROTO_CONF_DEFAULT, "default", trans_net_neigh_vars_table },
- { 0, NULL, trans_net_neigh_vars_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv4_netfilter_table[] = {
- { NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" },
-
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "ip_conntrack_tcp_timeout_syn_recv" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, "ip_conntrack_tcp_timeout_established" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, "ip_conntrack_tcp_timeout_fin_wait" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, "ip_conntrack_tcp_timeout_close_wait" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, "ip_conntrack_tcp_timeout_last_ack" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, "ip_conntrack_tcp_timeout_time_wait" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, "ip_conntrack_tcp_timeout_close" },
-
- { NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, "ip_conntrack_udp_timeout" },
- { NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, "ip_conntrack_udp_timeout_stream" },
- { NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, "ip_conntrack_icmp_timeout" },
- { NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, "ip_conntrack_generic_timeout" },
-
- { NET_IPV4_NF_CONNTRACK_BUCKETS, "ip_conntrack_buckets" },
- { NET_IPV4_NF_CONNTRACK_LOG_INVALID, "ip_conntrack_log_invalid" },
- { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, "ip_conntrack_tcp_timeout_max_retrans" },
- { NET_IPV4_NF_CONNTRACK_TCP_LOOSE, "ip_conntrack_tcp_loose" },
- { NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, "ip_conntrack_tcp_be_liberal" },
- { NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, "ip_conntrack_tcp_max_retrans" },
-
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, "ip_conntrack_sctp_timeout_closed" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, "ip_conntrack_sctp_timeout_cookie_wait" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, "ip_conntrack_sctp_timeout_cookie_echoed" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, "ip_conntrack_sctp_timeout_established" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, "ip_conntrack_sctp_timeout_shutdown_sent" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, "ip_conntrack_sctp_timeout_shutdown_recd" },
- { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, "ip_conntrack_sctp_timeout_shutdown_ack_sent" },
-
- { NET_IPV4_NF_CONNTRACK_COUNT, "ip_conntrack_count" },
- { NET_IPV4_NF_CONNTRACK_CHECKSUM, "ip_conntrack_checksum" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv4_table[] = {
- { NET_IPV4_FORWARD, "ip_forward" },
- { NET_IPV4_DYNADDR, "ip_dynaddr" },
-
- { NET_IPV4_CONF, "conf", trans_net_ipv4_conf_table },
- { NET_IPV4_NEIGH, "neigh", trans_net_neigh_table },
- { NET_IPV4_ROUTE, "route", trans_net_ipv4_route_table },
- /* NET_IPV4_FIB_HASH unused */
- { NET_IPV4_NETFILTER, "netfilter", trans_net_ipv4_netfilter_table },
-
- { NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" },
- { NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" },
- { NET_IPV4_TCP_SACK, "tcp_sack" },
- { NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse" },
- { NET_IPV4_DEFAULT_TTL, "ip_default_ttl" },
- /* NET_IPV4_AUTOCONFIG unused */
- { NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc" },
- { NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries" },
- { NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh" },
- { NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh" },
- { NET_IPV4_IPFRAG_TIME, "ipfrag_time" },
- /* NET_IPV4_TCP_MAX_KA_PROBES unused */
- { NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time" },
- { NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes" },
- { NET_IPV4_TCP_RETRIES1, "tcp_retries1" },
- { NET_IPV4_TCP_RETRIES2, "tcp_retries2" },
- { NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout" },
- /* NET_IPV4_IP_MASQ_DEBUG unused */
- { NET_TCP_SYNCOOKIES, "tcp_syncookies" },
- { NET_TCP_STDURG, "tcp_stdurg" },
- { NET_TCP_RFC1337, "tcp_rfc1337" },
- /* NET_TCP_SYN_TAILDROP unused */
- { NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog" },
- { NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range" },
- { NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all" },
- { NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts" },
- /* NET_IPV4_ICMP_SOURCEQUENCH_RATE unused */
- /* NET_IPV4_ICMP_DESTUNREACH_RATE unused */
- /* NET_IPV4_ICMP_TIMEEXCEED_RATE unused */
- /* NET_IPV4_ICMP_PARAMPROB_RATE unused */
- /* NET_IPV4_ICMP_ECHOREPLY_RATE unused */
- { NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses" },
- { NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships" },
- { NET_TCP_TW_RECYCLE, "tcp_tw_recycle" },
- /* NET_IPV4_ALWAYS_DEFRAG unused */
- { NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl" },
- { NET_IPV4_INET_PEER_THRESHOLD, "inet_peer_threshold" },
- { NET_IPV4_INET_PEER_MINTTL, "inet_peer_minttl" },
- { NET_IPV4_INET_PEER_MAXTTL, "inet_peer_maxttl" },
- { NET_IPV4_INET_PEER_GC_MINTIME, "inet_peer_gc_mintime" },
- { NET_IPV4_INET_PEER_GC_MAXTIME, "inet_peer_gc_maxtime" },
- { NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries" },
- { NET_TCP_ABORT_ON_OVERFLOW, "tcp_abort_on_overflow" },
- { NET_TCP_SYNACK_RETRIES, "tcp_synack_retries" },
- { NET_TCP_MAX_ORPHANS, "tcp_max_orphans" },
- { NET_TCP_MAX_TW_BUCKETS, "tcp_max_tw_buckets" },
- { NET_TCP_FACK, "tcp_fack" },
- { NET_TCP_REORDERING, "tcp_reordering" },
- { NET_TCP_ECN, "tcp_ecn" },
- { NET_TCP_DSACK, "tcp_dsack" },
- { NET_TCP_MEM, "tcp_mem" },
- { NET_TCP_WMEM, "tcp_wmem" },
- { NET_TCP_RMEM, "tcp_rmem" },
- { NET_TCP_APP_WIN, "tcp_app_win" },
- { NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale" },
- { NET_IPV4_NONLOCAL_BIND, "ip_nonlocal_bind" },
- { NET_IPV4_ICMP_RATELIMIT, "icmp_ratelimit" },
- { NET_IPV4_ICMP_RATEMASK, "icmp_ratemask" },
- { NET_TCP_TW_REUSE, "tcp_tw_reuse" },
- { NET_TCP_FRTO, "tcp_frto" },
- { NET_TCP_LOW_LATENCY, "tcp_low_latency" },
- { NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" },
- { NET_IPV4_IGMP_MAX_MSF, "igmp_max_msf" },
- { NET_TCP_NO_METRICS_SAVE, "tcp_no_metrics_save" },
- /* NET_TCP_DEFAULT_WIN_SCALE unused */
- { NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" },
- { NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" },
- /* NET_TCP_BIC_BETA unused */
- { NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, "icmp_errors_use_inbound_ifaddr" },
- { NET_TCP_CONG_CONTROL, "tcp_congestion_control" },
- { NET_TCP_ABC, "tcp_abc" },
- { NET_IPV4_IPFRAG_MAX_DIST, "ipfrag_max_dist" },
- { NET_TCP_MTU_PROBING, "tcp_mtu_probing" },
- { NET_TCP_BASE_MSS, "tcp_base_mss" },
- { NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" },
- { NET_TCP_DMA_COPYBREAK, "tcp_dma_copybreak" },
- { NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" },
- { NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" },
- { NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" },
- { NET_CIPSOV4_RBM_OPTFMT, "cipso_rbm_optfmt" },
- { NET_CIPSOV4_RBM_STRICTVALID, "cipso_rbm_strictvalid" },
- { NET_TCP_AVAIL_CONG_CONTROL, "tcp_available_congestion_control" },
- { NET_TCP_ALLOWED_CONG_CONTROL, "tcp_allowed_congestion_control" },
- { NET_TCP_MAX_SSTHRESH, "tcp_max_ssthresh" },
- { NET_TCP_FRTO_RESPONSE, "tcp_frto_response" },
- { 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipx_table[] = {
- { NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" },
- /* NET_IPX_FORWARDING unused */
- {}
-};
-
-static const struct trans_ctl_table trans_net_atalk_table[] = {
- { NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" },
- { NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" },
- { NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" },
- { NET_ATALK_AARP_RESOLVE_TIME, "aarp-resolve-time" },
- {},
-};
-
-static const struct trans_ctl_table trans_net_netrom_table[] = {
- { NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" },
- { NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" },
- { NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" },
- { NET_NETROM_TRANSPORT_TIMEOUT, "transport_timeout" },
- { NET_NETROM_TRANSPORT_MAXIMUM_TRIES, "transport_maximum_tries" },
- { NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, "transport_acknowledge_delay" },
- { NET_NETROM_TRANSPORT_BUSY_DELAY, "transport_busy_delay" },
- { NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, "transport_requested_window_size" },
- { NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, "transport_no_activity_timeout" },
- { NET_NETROM_ROUTING_CONTROL, "routing_control" },
- { NET_NETROM_LINK_FAILS_COUNT, "link_fails_count" },
- { NET_NETROM_RESET, "reset" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ax25_param_table[] = {
- { NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" },
- { NET_AX25_DEFAULT_MODE, "ax25_default_mode" },
- { NET_AX25_BACKOFF_TYPE, "backoff_type" },
- { NET_AX25_CONNECT_MODE, "connect_mode" },
- { NET_AX25_STANDARD_WINDOW, "standard_window_size" },
- { NET_AX25_EXTENDED_WINDOW, "extended_window_size" },
- { NET_AX25_T1_TIMEOUT, "t1_timeout" },
- { NET_AX25_T2_TIMEOUT, "t2_timeout" },
- { NET_AX25_T3_TIMEOUT, "t3_timeout" },
- { NET_AX25_IDLE_TIMEOUT, "idle_timeout" },
- { NET_AX25_N2, "maximum_retry_count" },
- { NET_AX25_PACLEN, "maximum_packet_length" },
- { NET_AX25_PROTOCOL, "protocol" },
- { NET_AX25_DAMA_SLAVE_TIMEOUT, "dama_slave_timeout" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ax25_table[] = {
- { 0, NULL, trans_net_ax25_param_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_bridge_table[] = {
- { NET_BRIDGE_NF_CALL_ARPTABLES, "bridge-nf-call-arptables" },
- { NET_BRIDGE_NF_CALL_IPTABLES, "bridge-nf-call-iptables" },
- { NET_BRIDGE_NF_CALL_IP6TABLES, "bridge-nf-call-ip6tables" },
- { NET_BRIDGE_NF_FILTER_VLAN_TAGGED, "bridge-nf-filter-vlan-tagged" },
- { NET_BRIDGE_NF_FILTER_PPPOE_TAGGED, "bridge-nf-filter-pppoe-tagged" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_rose_table[] = {
- { NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
- { NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
- { NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
- { NET_ROSE_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
- { NET_ROSE_ACK_HOLD_BACK_TIMEOUT, "acknowledge_hold_back_timeout" },
- { NET_ROSE_ROUTING_CONTROL, "routing_control" },
- { NET_ROSE_LINK_FAIL_TIMEOUT, "link_fail_timeout" },
- { NET_ROSE_MAX_VCS, "maximum_virtual_circuits" },
- { NET_ROSE_WINDOW_SIZE, "window_size" },
- { NET_ROSE_NO_ACTIVITY_TIMEOUT, "no_activity_timeout" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv6_conf_var_table[] = {
- { NET_IPV6_FORWARDING, "forwarding" },
- { NET_IPV6_HOP_LIMIT, "hop_limit" },
- { NET_IPV6_MTU, "mtu" },
- { NET_IPV6_ACCEPT_RA, "accept_ra" },
- { NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects" },
- { NET_IPV6_AUTOCONF, "autoconf" },
- { NET_IPV6_DAD_TRANSMITS, "dad_transmits" },
- { NET_IPV6_RTR_SOLICITS, "router_solicitations" },
- { NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval" },
- { NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay" },
- { NET_IPV6_USE_TEMPADDR, "use_tempaddr" },
- { NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft" },
- { NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft" },
- { NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry" },
- { NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor" },
- { NET_IPV6_MAX_ADDRESSES, "max_addresses" },
- { NET_IPV6_FORCE_MLD_VERSION, "force_mld_version" },
- { NET_IPV6_ACCEPT_RA_DEFRTR, "accept_ra_defrtr" },
- { NET_IPV6_ACCEPT_RA_PINFO, "accept_ra_pinfo" },
- { NET_IPV6_ACCEPT_RA_RTR_PREF, "accept_ra_rtr_pref" },
- { NET_IPV6_RTR_PROBE_INTERVAL, "router_probe_interval" },
- { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" },
- { NET_IPV6_PROXY_NDP, "proxy_ndp" },
- { NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv6_conf_table[] = {
- { NET_PROTO_CONF_ALL, "all", trans_net_ipv6_conf_var_table },
- { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv6_conf_var_table },
- { 0, NULL, trans_net_ipv6_conf_var_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv6_route_table[] = {
- { NET_IPV6_ROUTE_FLUSH, "flush" },
- { NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" },
- { NET_IPV6_ROUTE_MAX_SIZE, "max_size" },
- { NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
- { NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout" },
- { NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval" },
- { NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity" },
- { NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires" },
- { NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss" },
- { NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv6_icmp_table[] = {
- { NET_IPV6_ICMP_RATELIMIT, "ratelimit" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_ipv6_table[] = {
- { NET_IPV6_CONF, "conf", trans_net_ipv6_conf_table },
- { NET_IPV6_NEIGH, "neigh", trans_net_neigh_table },
- { NET_IPV6_ROUTE, "route", trans_net_ipv6_route_table },
- { NET_IPV6_ICMP, "icmp", trans_net_ipv6_icmp_table },
- { NET_IPV6_BINDV6ONLY, "bindv6only" },
- { NET_IPV6_IP6FRAG_HIGH_THRESH, "ip6frag_high_thresh" },
- { NET_IPV6_IP6FRAG_LOW_THRESH, "ip6frag_low_thresh" },
- { NET_IPV6_IP6FRAG_TIME, "ip6frag_time" },
- { NET_IPV6_IP6FRAG_SECRET_INTERVAL, "ip6frag_secret_interval" },
- { NET_IPV6_MLD_MAX_MSF, "mld_max_msf" },
- { 2088 /* IPQ_QMAX */, "ip6_queue_maxlen" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_x25_table[] = {
- { NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
- { NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
- { NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
- { NET_X25_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
- { NET_X25_ACK_HOLD_BACK_TIMEOUT, "acknowledgement_hold_back_timeout" },
- { NET_X25_FORWARD, "x25_forward" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_tr_table[] = {
- { NET_TR_RIF_TIMEOUT, "rif_timeout" },
- {}
-};
-
-
-static const struct trans_ctl_table trans_net_decnet_conf_vars[] = {
- { NET_DECNET_CONF_DEV_FORWARDING, "forwarding" },
- { NET_DECNET_CONF_DEV_PRIORITY, "priority" },
- { NET_DECNET_CONF_DEV_T2, "t2" },
- { NET_DECNET_CONF_DEV_T3, "t3" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_decnet_conf[] = {
- { 0, NULL, trans_net_decnet_conf_vars },
- {}
-};
-
-static const struct trans_ctl_table trans_net_decnet_table[] = {
- { NET_DECNET_CONF, "conf", trans_net_decnet_conf },
- { NET_DECNET_NODE_ADDRESS, "node_address" },
- { NET_DECNET_NODE_NAME, "node_name" },
- { NET_DECNET_DEFAULT_DEVICE, "default_device" },
- { NET_DECNET_TIME_WAIT, "time_wait" },
- { NET_DECNET_DN_COUNT, "dn_count" },
- { NET_DECNET_DI_COUNT, "di_count" },
- { NET_DECNET_DR_COUNT, "dr_count" },
- { NET_DECNET_DST_GC_INTERVAL, "dst_gc_interval" },
- { NET_DECNET_NO_FC_MAX_CWND, "no_fc_max_cwnd" },
- { NET_DECNET_MEM, "decnet_mem" },
- { NET_DECNET_RMEM, "decnet_rmem" },
- { NET_DECNET_WMEM, "decnet_wmem" },
- { NET_DECNET_DEBUG_LEVEL, "debug" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_sctp_table[] = {
- { NET_SCTP_RTO_INITIAL, "rto_initial" },
- { NET_SCTP_RTO_MIN, "rto_min" },
- { NET_SCTP_RTO_MAX, "rto_max" },
- { NET_SCTP_RTO_ALPHA, "rto_alpha_exp_divisor" },
- { NET_SCTP_RTO_BETA, "rto_beta_exp_divisor" },
- { NET_SCTP_VALID_COOKIE_LIFE, "valid_cookie_life" },
- { NET_SCTP_ASSOCIATION_MAX_RETRANS, "association_max_retrans" },
- { NET_SCTP_PATH_MAX_RETRANS, "path_max_retrans" },
- { NET_SCTP_MAX_INIT_RETRANSMITS, "max_init_retransmits" },
- { NET_SCTP_HB_INTERVAL, "hb_interval" },
- { NET_SCTP_PRESERVE_ENABLE, "cookie_preserve_enable" },
- { NET_SCTP_MAX_BURST, "max_burst" },
- { NET_SCTP_ADDIP_ENABLE, "addip_enable" },
- { NET_SCTP_PRSCTP_ENABLE, "prsctp_enable" },
- { NET_SCTP_SNDBUF_POLICY, "sndbuf_policy" },
- { NET_SCTP_SACK_TIMEOUT, "sack_timeout" },
- { NET_SCTP_RCVBUF_POLICY, "rcvbuf_policy" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = {
- { NET_LLC2_ACK_TIMEOUT, "ack" },
- { NET_LLC2_P_TIMEOUT, "p" },
- { NET_LLC2_REJ_TIMEOUT, "rej" },
- { NET_LLC2_BUSY_TIMEOUT, "busy" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_llc_station_table[] = {
- { NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_llc_llc2_table[] = {
- { NET_LLC2, "timeout", trans_net_llc_llc2_timeout_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_llc_table[] = {
- { NET_LLC2, "llc2", trans_net_llc_llc2_table },
- { NET_LLC_STATION, "station", trans_net_llc_station_table },
- {}
-};
-
-static const struct trans_ctl_table trans_net_netfilter_table[] = {
- { NET_NF_CONNTRACK_MAX, "nf_conntrack_max" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "nf_conntrack_tcp_timeout_syn_sent" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "nf_conntrack_tcp_timeout_syn_recv" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, "nf_conntrack_tcp_timeout_established" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, "nf_conntrack_tcp_timeout_fin_wait" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, "nf_conntrack_tcp_timeout_close_wait" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, "nf_conntrack_tcp_timeout_last_ack" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, "nf_conntrack_tcp_timeout_time_wait" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, "nf_conntrack_tcp_timeout_close" },
- { NET_NF_CONNTRACK_UDP_TIMEOUT, "nf_conntrack_udp_timeout" },
- { NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, "nf_conntrack_udp_timeout_stream" },
- { NET_NF_CONNTRACK_ICMP_TIMEOUT, "nf_conntrack_icmp_timeout" },
- { NET_NF_CONNTRACK_GENERIC_TIMEOUT, "nf_conntrack_generic_timeout" },
- { NET_NF_CONNTRACK_BUCKETS, "nf_conntrack_buckets" },
- { NET_NF_CONNTRACK_LOG_INVALID, "nf_conntrack_log_invalid" },
- { NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, "nf_conntrack_tcp_timeout_max_retrans" },
- { NET_NF_CONNTRACK_TCP_LOOSE, "nf_conntrack_tcp_loose" },
- { NET_NF_CONNTRACK_TCP_BE_LIBERAL, "nf_conntrack_tcp_be_liberal" },
- { NET_NF_CONNTRACK_TCP_MAX_RETRANS, "nf_conntrack_tcp_max_retrans" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, "nf_conntrack_sctp_timeout_closed" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, "nf_conntrack_sctp_timeout_cookie_wait" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, "nf_conntrack_sctp_timeout_cookie_echoed" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, "nf_conntrack_sctp_timeout_established" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, "nf_conntrack_sctp_timeout_shutdown_sent" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, "nf_conntrack_sctp_timeout_shutdown_recd" },
- { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, "nf_conntrack_sctp_timeout_shutdown_ack_sent" },
- { NET_NF_CONNTRACK_COUNT, "nf_conntrack_count" },
- { NET_NF_CONNTRACK_ICMPV6_TIMEOUT, "nf_conntrack_icmpv6_timeout" },
- { NET_NF_CONNTRACK_FRAG6_TIMEOUT, "nf_conntrack_frag6_timeout" },
- { NET_NF_CONNTRACK_FRAG6_LOW_THRESH, "nf_conntrack_frag6_low_thresh" },
- { NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, "nf_conntrack_frag6_high_thresh" },
- { NET_NF_CONNTRACK_CHECKSUM, "nf_conntrack_checksum" },
-
- {}
-};
-
-static const struct trans_ctl_table trans_net_dccp_table[] = {
- { NET_DCCP_DEFAULT, "default" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_irda_table[] = {
- { NET_IRDA_DISCOVERY, "discovery" },
- { NET_IRDA_DEVNAME, "devname" },
- { NET_IRDA_DEBUG, "debug" },
- { NET_IRDA_FAST_POLL, "fast_poll_increase" },
- { NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" },
- { NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" },
- { NET_IRDA_SLOT_TIMEOUT, "slot_timeout" },
- { NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" },
- { NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" },
- { NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" },
- { NET_IRDA_MAX_TX_WINDOW, "max_tx_window" },
- { NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" },
- { NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" },
- { NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" },
- {}
-};
-
-static const struct trans_ctl_table trans_net_table[] = {
- { NET_CORE, "core", trans_net_core_table },
- /* NET_ETHER not used */
- /* NET_802 not used */
- { NET_UNIX, "unix", trans_net_unix_table },
- { NET_IPV4, "ipv4", trans_net_ipv4_table },
- { NET_IPX, "ipx", trans_net_ipx_table },
- { NET_ATALK, "appletalk", trans_net_atalk_table },
- { NET_NETROM, "netrom", trans_net_netrom_table },
- { NET_AX25, "ax25", trans_net_ax25_table },
- { NET_BRIDGE, "bridge", trans_net_bridge_table },
- { NET_ROSE, "rose", trans_net_rose_table },
- { NET_IPV6, "ipv6", trans_net_ipv6_table },
- { NET_X25, "x25", trans_net_x25_table },
- { NET_TR, "token-ring", trans_net_tr_table },
- { NET_DECNET, "decnet", trans_net_decnet_table },
- /* NET_ECONET not used */
- { NET_SCTP, "sctp", trans_net_sctp_table },
- { NET_LLC, "llc", trans_net_llc_table },
- { NET_NETFILTER, "netfilter", trans_net_netfilter_table },
- { NET_DCCP, "dccp", trans_net_dccp_table },
- { NET_IRDA, "irda", trans_net_irda_table },
- { 2089, "nf_conntrack_max" },
- {}
-};
-
-static const struct trans_ctl_table trans_fs_quota_table[] = {
- { FS_DQ_LOOKUPS, "lookups" },
- { FS_DQ_DROPS, "drops" },
- { FS_DQ_READS, "reads" },
- { FS_DQ_WRITES, "writes" },
- { FS_DQ_CACHE_HITS, "cache_hits" },
- { FS_DQ_ALLOCATED, "allocated_dquots" },
- { FS_DQ_FREE, "free_dquots" },
- { FS_DQ_SYNCS, "syncs" },
- { FS_DQ_WARNINGS, "warnings" },
- {}
-};
-
-static const struct trans_ctl_table trans_fs_xfs_table[] = {
- { XFS_SGID_INHERIT, "irix_sgid_inherit" },
- { XFS_SYMLINK_MODE, "irix_symlink_mode" },
- { XFS_PANIC_MASK, "panic_mask" },
-
- { XFS_ERRLEVEL, "error_level" },
- { XFS_SYNCD_TIMER, "xfssyncd_centisecs" },
- { XFS_INHERIT_SYNC, "inherit_sync" },
- { XFS_INHERIT_NODUMP, "inherit_nodump" },
- { XFS_INHERIT_NOATIME, "inherit_noatime" },
- { XFS_BUF_TIMER, "xfsbufd_centisecs" },
- { XFS_BUF_AGE, "age_buffer_centisecs" },
- { XFS_INHERIT_NOSYM, "inherit_nosymlinks" },
- { XFS_ROTORSTEP, "rotorstep" },
- { XFS_INHERIT_NODFRG, "inherit_nodefrag" },
- { XFS_FILESTREAM_TIMER, "filestream_centisecs" },
- { XFS_STATS_CLEAR, "stats_clear" },
- {}
-};
-
-static const struct trans_ctl_table trans_fs_ocfs2_nm_table[] = {
- { 1, "hb_ctl_path" },
- {}
-};
-
-static const struct trans_ctl_table trans_fs_ocfs2_table[] = {
- { 1, "nm", trans_fs_ocfs2_nm_table },
- {}
-};
-
-static const struct trans_ctl_table trans_inotify_table[] = {
- { INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
- { INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
- { INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
- {}
-};
-
-static const struct trans_ctl_table trans_fs_table[] = {
- { FS_NRINODE, "inode-nr" },
- { FS_STATINODE, "inode-state" },
- /* FS_MAXINODE unused */
- /* FS_NRDQUOT unused */
- /* FS_MAXDQUOT unused */
- { FS_NRFILE, "file-nr" },
- { FS_MAXFILE, "file-max" },
- { FS_DENTRY, "dentry-state" },
- /* FS_NRSUPER unused */
- /* FS_MAXUPSER unused */
- { FS_OVERFLOWUID, "overflowuid" },
- { FS_OVERFLOWGID, "overflowgid" },
- { FS_LEASES, "leases-enable" },
- { FS_DIR_NOTIFY, "dir-notify-enable" },
- { FS_LEASE_TIME, "lease-break-time" },
- { FS_DQSTATS, "quota", trans_fs_quota_table },
- { FS_XFS, "xfs", trans_fs_xfs_table },
- { FS_AIO_NR, "aio-nr" },
- { FS_AIO_MAX_NR, "aio-max-nr" },
- { FS_INOTIFY, "inotify", trans_inotify_table },
- { FS_OCFS2, "ocfs2", trans_fs_ocfs2_table },
- { KERN_SETUID_DUMPABLE, "suid_dumpable" },
- {}
-};
-
-static const struct trans_ctl_table trans_debug_table[] = {
- {}
-};
-
-static const struct trans_ctl_table trans_cdrom_table[] = {
- { DEV_CDROM_INFO, "info" },
- { DEV_CDROM_AUTOCLOSE, "autoclose" },
- { DEV_CDROM_AUTOEJECT, "autoeject" },
- { DEV_CDROM_DEBUG, "debug" },
- { DEV_CDROM_LOCK, "lock" },
- { DEV_CDROM_CHECK_MEDIA, "check_media" },
- {}
-};
-
-static const struct trans_ctl_table trans_ipmi_table[] = {
- { DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" },
- {}
-};
-
-static const struct trans_ctl_table trans_mac_hid_files[] = {
- /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */
- /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */
- { DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" },
- { DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE, "mouse_button2_keycode" },
- { DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE, "mouse_button3_keycode" },
- /* DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES unused */
- {}
-};
-
-static const struct trans_ctl_table trans_raid_table[] = {
- { DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" },
- { DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" },
- {}
-};
-
-static const struct trans_ctl_table trans_scsi_table[] = {
- { DEV_SCSI_LOGGING_LEVEL, "logging_level" },
- {}
-};
-
-static const struct trans_ctl_table trans_parport_default_table[] = {
- { DEV_PARPORT_DEFAULT_TIMESLICE, "timeslice" },
- { DEV_PARPORT_DEFAULT_SPINTIME, "spintime" },
- {}
-};
-
-static const struct trans_ctl_table trans_parport_device_table[] = {
- { DEV_PARPORT_DEVICE_TIMESLICE, "timeslice" },
- {}
-};
-
-static const struct trans_ctl_table trans_parport_devices_table[] = {
- { DEV_PARPORT_DEVICES_ACTIVE, "active" },
- { 0, NULL, trans_parport_device_table },
- {}
-};
-
-static const struct trans_ctl_table trans_parport_parport_table[] = {
- { DEV_PARPORT_SPINTIME, "spintime" },
- { DEV_PARPORT_BASE_ADDR, "base-addr" },
- { DEV_PARPORT_IRQ, "irq" },
- { DEV_PARPORT_DMA, "dma" },
- { DEV_PARPORT_MODES, "modes" },
- { DEV_PARPORT_DEVICES, "devices", trans_parport_devices_table },
- { DEV_PARPORT_AUTOPROBE, "autoprobe" },
- { DEV_PARPORT_AUTOPROBE + 1, "autoprobe0" },
- { DEV_PARPORT_AUTOPROBE + 2, "autoprobe1" },
- { DEV_PARPORT_AUTOPROBE + 3, "autoprobe2" },
- { DEV_PARPORT_AUTOPROBE + 4, "autoprobe3" },
- {}
-};
-static const struct trans_ctl_table trans_parport_table[] = {
- { DEV_PARPORT_DEFAULT, "default", trans_parport_default_table },
- { 0, NULL, trans_parport_parport_table },
- {}
-};
-
-static const struct trans_ctl_table trans_dev_table[] = {
- { DEV_CDROM, "cdrom", trans_cdrom_table },
- /* DEV_HWMON unused */
- { DEV_PARPORT, "parport", trans_parport_table },
- { DEV_RAID, "raid", trans_raid_table },
- { DEV_MAC_HID, "mac_hid", trans_mac_hid_files },
- { DEV_SCSI, "scsi", trans_scsi_table },
- { DEV_IPMI, "ipmi", trans_ipmi_table },
- {}
-};
-
-static const struct trans_ctl_table trans_bus_isa_table[] = {
- { BUS_ISA_MEM_BASE, "membase" },
- { BUS_ISA_PORT_BASE, "portbase" },
- { BUS_ISA_PORT_SHIFT, "portshift" },
- {}
-};
-
-static const struct trans_ctl_table trans_bus_table[] = {
- { CTL_BUS_ISA, "isa", trans_bus_isa_table },
- {}
-};
-
-static const struct trans_ctl_table trans_arlan_conf_table0[] = {
- { 1, "spreadingCode" },
- { 2, "channelNumber" },
- { 3, "scramblingDisable" },
- { 4, "txAttenuation" },
- { 5, "systemId" },
- { 6, "maxDatagramSize" },
- { 7, "maxFrameSize" },
- { 8, "maxRetries" },
- { 9, "receiveMode" },
- { 10, "priority" },
- { 11, "rootOrRepeater" },
- { 12, "SID" },
- { 13, "registrationMode" },
- { 14, "registrationFill" },
- { 15, "localTalkAddress" },
- { 16, "codeFormat" },
- { 17, "numChannels" },
- { 18, "channel1" },
- { 19, "channel2" },
- { 20, "channel3" },
- { 21, "channel4" },
- { 22, "txClear" },
- { 23, "txRetries" },
- { 24, "txRouting" },
- { 25, "txScrambled" },
- { 26, "rxParameter" },
- { 27, "txTimeoutMs" },
- { 28, "waitCardTimeout" },
- { 29, "channelSet" },
- { 30, "name" },
- { 31, "waitTime" },
- { 32, "lParameter" },
- { 33, "_15" },
- { 34, "headerSize" },
- { 36, "tx_delay_ms" },
- { 37, "retries" },
- { 38, "ReTransmitPacketMaxSize" },
- { 39, "waitReTransmitPacketMaxSize" },
- { 40, "fastReTransCount" },
- { 41, "driverRetransmissions" },
- { 42, "txAckTimeoutMs" },
- { 43, "registrationInterrupts" },
- { 44, "hardwareType" },
- { 45, "radioType" },
- { 46, "writeEEPROM" },
- { 47, "writeRadioType" },
- { 48, "entry_exit_debug" },
- { 49, "debug" },
- { 50, "in_speed" },
- { 51, "out_speed" },
- { 52, "in_speed10" },
- { 53, "out_speed10" },
- { 54, "in_speed_max" },
- { 55, "out_speed_max" },
- { 56, "measure_rate" },
- { 57, "pre_Command_Wait" },
- { 58, "rx_tweak1" },
- { 59, "rx_tweak2" },
- { 60, "tx_queue_len" },
-
- { 150, "arlan0-txRing" },
- { 151, "arlan0-rxRing" },
- { 152, "arlan0-18" },
- { 153, "arlan0-ring" },
- { 154, "arlan0-shm-cpy" },
- { 155, "config0" },
- { 156, "reset0" },
- {}
-};
-
-static const struct trans_ctl_table trans_arlan_conf_table1[] = {
- { 1, "spreadingCode" },
- { 2, "channelNumber" },
- { 3, "scramblingDisable" },
- { 4, "txAttenuation" },
- { 5, "systemId" },
- { 6, "maxDatagramSize" },
- { 7, "maxFrameSize" },
- { 8, "maxRetries" },
- { 9, "receiveMode" },
- { 10, "priority" },
- { 11, "rootOrRepeater" },
- { 12, "SID" },
- { 13, "registrationMode" },
- { 14, "registrationFill" },
- { 15, "localTalkAddress" },
- { 16, "codeFormat" },
- { 17, "numChannels" },
- { 18, "channel1" },
- { 19, "channel2" },
- { 20, "channel3" },
- { 21, "channel4" },
- { 22, "txClear" },
- { 23, "txRetries" },
- { 24, "txRouting" },
- { 25, "txScrambled" },
- { 26, "rxParameter" },
- { 27, "txTimeoutMs" },
- { 28, "waitCardTimeout" },
- { 29, "channelSet" },
- { 30, "name" },
- { 31, "waitTime" },
- { 32, "lParameter" },
- { 33, "_15" },
- { 34, "headerSize" },
- { 36, "tx_delay_ms" },
- { 37, "retries" },
- { 38, "ReTransmitPacketMaxSize" },
- { 39, "waitReTransmitPacketMaxSize" },
- { 40, "fastReTransCount" },
- { 41, "driverRetransmissions" },
- { 42, "txAckTimeoutMs" },
- { 43, "registrationInterrupts" },
- { 44, "hardwareType" },
- { 45, "radioType" },
- { 46, "writeEEPROM" },
- { 47, "writeRadioType" },
- { 48, "entry_exit_debug" },
- { 49, "debug" },
- { 50, "in_speed" },
- { 51, "out_speed" },
- { 52, "in_speed10" },
- { 53, "out_speed10" },
- { 54, "in_speed_max" },
- { 55, "out_speed_max" },
- { 56, "measure_rate" },
- { 57, "pre_Command_Wait" },
- { 58, "rx_tweak1" },
- { 59, "rx_tweak2" },
- { 60, "tx_queue_len" },
-
- { 150, "arlan1-txRing" },
- { 151, "arlan1-rxRing" },
- { 152, "arlan1-18" },
- { 153, "arlan1-ring" },
- { 154, "arlan1-shm-cpy" },
- { 155, "config1" },
- { 156, "reset1" },
- {}
-};
-
-static const struct trans_ctl_table trans_arlan_conf_table2[] = {
- { 1, "spreadingCode" },
- { 2, "channelNumber" },
- { 3, "scramblingDisable" },
- { 4, "txAttenuation" },
- { 5, "systemId" },
- { 6, "maxDatagramSize" },
- { 7, "maxFrameSize" },
- { 8, "maxRetries" },
- { 9, "receiveMode" },
- { 10, "priority" },
- { 11, "rootOrRepeater" },
- { 12, "SID" },
- { 13, "registrationMode" },
- { 14, "registrationFill" },
- { 15, "localTalkAddress" },
- { 16, "codeFormat" },
- { 17, "numChannels" },
- { 18, "channel1" },
- { 19, "channel2" },
- { 20, "channel3" },
- { 21, "channel4" },
- { 22, "txClear" },
- { 23, "txRetries" },
- { 24, "txRouting" },
- { 25, "txScrambled" },
- { 26, "rxParameter" },
- { 27, "txTimeoutMs" },
- { 28, "waitCardTimeout" },
- { 29, "channelSet" },
- { 30, "name" },
- { 31, "waitTime" },
- { 32, "lParameter" },
- { 33, "_15" },
- { 34, "headerSize" },
- { 36, "tx_delay_ms" },
- { 37, "retries" },
- { 38, "ReTransmitPacketMaxSize" },
- { 39, "waitReTransmitPacketMaxSize" },
- { 40, "fastReTransCount" },
- { 41, "driverRetransmissions" },
- { 42, "txAckTimeoutMs" },
- { 43, "registrationInterrupts" },
- { 44, "hardwareType" },
- { 45, "radioType" },
- { 46, "writeEEPROM" },
- { 47, "writeRadioType" },
- { 48, "entry_exit_debug" },
- { 49, "debug" },
- { 50, "in_speed" },
- { 51, "out_speed" },
- { 52, "in_speed10" },
- { 53, "out_speed10" },
- { 54, "in_speed_max" },
- { 55, "out_speed_max" },
- { 56, "measure_rate" },
- { 57, "pre_Command_Wait" },
- { 58, "rx_tweak1" },
- { 59, "rx_tweak2" },
- { 60, "tx_queue_len" },
-
- { 150, "arlan2-txRing" },
- { 151, "arlan2-rxRing" },
- { 152, "arlan2-18" },
- { 153, "arlan2-ring" },
- { 154, "arlan2-shm-cpy" },
- { 155, "config2" },
- { 156, "reset2" },
- {}
-};
-
-static const struct trans_ctl_table trans_arlan_conf_table3[] = {
- { 1, "spreadingCode" },
- { 2, "channelNumber" },
- { 3, "scramblingDisable" },
- { 4, "txAttenuation" },
- { 5, "systemId" },
- { 6, "maxDatagramSize" },
- { 7, "maxFrameSize" },
- { 8, "maxRetries" },
- { 9, "receiveMode" },
- { 10, "priority" },
- { 11, "rootOrRepeater" },
- { 12, "SID" },
- { 13, "registrationMode" },
- { 14, "registrationFill" },
- { 15, "localTalkAddress" },
- { 16, "codeFormat" },
- { 17, "numChannels" },
- { 18, "channel1" },
- { 19, "channel2" },
- { 20, "channel3" },
- { 21, "channel4" },
- { 22, "txClear" },
- { 23, "txRetries" },
- { 24, "txRouting" },
- { 25, "txScrambled" },
- { 26, "rxParameter" },
- { 27, "txTimeoutMs" },
- { 28, "waitCardTimeout" },
- { 29, "channelSet" },
- { 30, "name" },
- { 31, "waitTime" },
- { 32, "lParameter" },
- { 33, "_15" },
- { 34, "headerSize" },
- { 36, "tx_delay_ms" },
- { 37, "retries" },
- { 38, "ReTransmitPacketMaxSize" },
- { 39, "waitReTransmitPacketMaxSize" },
- { 40, "fastReTransCount" },
- { 41, "driverRetransmissions" },
- { 42, "txAckTimeoutMs" },
- { 43, "registrationInterrupts" },
- { 44, "hardwareType" },
- { 45, "radioType" },
- { 46, "writeEEPROM" },
- { 47, "writeRadioType" },
- { 48, "entry_exit_debug" },
- { 49, "debug" },
- { 50, "in_speed" },
- { 51, "out_speed" },
- { 52, "in_speed10" },
- { 53, "out_speed10" },
- { 54, "in_speed_max" },
- { 55, "out_speed_max" },
- { 56, "measure_rate" },
- { 57, "pre_Command_Wait" },
- { 58, "rx_tweak1" },
- { 59, "rx_tweak2" },
- { 60, "tx_queue_len" },
-
- { 150, "arlan3-txRing" },
- { 151, "arlan3-rxRing" },
- { 152, "arlan3-18" },
- { 153, "arlan3-ring" },
- { 154, "arlan3-shm-cpy" },
- { 155, "config3" },
- { 156, "reset3" },
- {}
-};
-
-static const struct trans_ctl_table trans_arlan_table[] = {
- { 1, "arlan0", trans_arlan_conf_table0 },
- { 2, "arlan1", trans_arlan_conf_table1 },
- { 3, "arlan2", trans_arlan_conf_table2 },
- { 4, "arlan3", trans_arlan_conf_table3 },
- {}
-};
-
-static const struct trans_ctl_table trans_s390dbf_table[] = {
- { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" },
- { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" },
- {}
-};
-
-static const struct trans_ctl_table trans_sunrpc_table[] = {
- { CTL_RPCDEBUG, "rpc_debug" },
- { CTL_NFSDEBUG, "nfs_debug" },
- { CTL_NFSDDEBUG, "nfsd_debug" },
- { CTL_NLMDEBUG, "nlm_debug" },
- { CTL_SLOTTABLE_UDP, "udp_slot_table_entries" },
- { CTL_SLOTTABLE_TCP, "tcp_slot_table_entries" },
- { CTL_MIN_RESVPORT, "min_resvport" },
- { CTL_MAX_RESVPORT, "max_resvport" },
- {}
-};
-
-static const struct trans_ctl_table trans_pm_table[] = {
- { 1 /* CTL_PM_SUSPEND */, "suspend" },
- { 2 /* CTL_PM_CMODE */, "cmode" },
- { 3 /* CTL_PM_P0 */, "p0" },
- { 4 /* CTL_PM_CM */, "cm" },
- {}
-};
-
-static const struct trans_ctl_table trans_frv_table[] = {
- { 1, "cache-mode" },
- { 2, "pin-cxnr" },
- {}
-};
-
-static const struct trans_ctl_table trans_root_table[] = {
- { CTL_KERN, "kernel", trans_kern_table },
- { CTL_VM, "vm", trans_vm_table },
- { CTL_NET, "net", trans_net_table },
- /* CTL_PROC not used */
- { CTL_FS, "fs", trans_fs_table },
- { CTL_DEBUG, "debug", trans_debug_table },
- { CTL_DEV, "dev", trans_dev_table },
- { CTL_BUS, "bus", trans_bus_table },
- { CTL_ABI, "abi" },
- /* CTL_CPU not used */
- { CTL_ARLAN, "arlan", trans_arlan_table },
- { CTL_S390DBF, "s390dbf", trans_s390dbf_table },
- { CTL_SUNRPC, "sunrpc", trans_sunrpc_table },
- { CTL_PM, "pm", trans_pm_table },
- { CTL_FRV, "frv", trans_frv_table },
- {}
-};
-
-
-
static int sysctl_depth(struct ctl_table *table)
{
@@ -1261,47 +28,6 @@ static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
return table;
}
-static const struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table)
-{
- struct ctl_table *test;
- const struct trans_ctl_table *ref;
- int cur_depth;
-
- cur_depth = sysctl_depth(table);
-
- ref = trans_root_table;
-repeat:
- test = sysctl_parent(table, cur_depth);
- for (; ref->ctl_name || ref->procname || ref->child; ref++) {
- int match = 0;
-
- if (cur_depth && !ref->child)
- continue;
-
- if (test->procname && ref->procname &&
- (strcmp(test->procname, ref->procname) == 0))
- match++;
-
- if (test->ctl_name && ref->ctl_name &&
- (test->ctl_name == ref->ctl_name))
- match++;
-
- if (!ref->ctl_name && !ref->procname)
- match++;
-
- if (match) {
- if (cur_depth != 0) {
- cur_depth--;
- ref = ref->child;
- goto repeat;
- }
- goto out;
- }
- }
- ref = NULL;
-out:
- return ref;
-}
static void sysctl_print_path(struct ctl_table *table)
{
@@ -1315,26 +41,6 @@ static void sysctl_print_path(struct ctl_table *table)
}
}
printk(" ");
- if (table->ctl_name) {
- for (i = depth; i >= 0; i--) {
- tmp = sysctl_parent(table, i);
- printk(".%d", tmp->ctl_name);
- }
- }
-}
-
-static void sysctl_repair_table(struct ctl_table *table)
-{
- /* Don't complain about the classic default
- * sysctl strategy routine. Maybe later we
- * can get the tables fixed and complain about
- * this.
- */
- if (table->ctl_name && table->procname &&
- (table->proc_handler == proc_dointvec) &&
- (!table->strategy)) {
- table->strategy = sysctl_data;
- }
}
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
@@ -1352,7 +58,7 @@ static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
ref = head->ctl_table;
repeat:
test = sysctl_parent(table, cur_depth);
- for (; ref->ctl_name || ref->procname; ref++) {
+ for (; ref->procname; ref++) {
int match = 0;
if (cur_depth && !ref->child)
continue;
@@ -1361,10 +67,6 @@ repeat:
(strcmp(test->procname, ref->procname) == 0))
match++;
- if (test->ctl_name && ref->ctl_name &&
- (test->ctl_name == ref->ctl_name))
- match++;
-
if (match) {
if (cur_depth != 0) {
cur_depth--;
@@ -1392,38 +94,6 @@ static void set_fail(const char **fail, struct ctl_table *table, const char *str
*fail = str;
}
-static int sysctl_check_dir(struct nsproxy *namespaces,
- struct ctl_table *table)
-{
- struct ctl_table *ref;
- int error;
-
- error = 0;
- ref = sysctl_check_lookup(namespaces, table);
- if (ref) {
- int match = 0;
- if ((!table->procname && !ref->procname) ||
- (table->procname && ref->procname &&
- (strcmp(table->procname, ref->procname) == 0)))
- match++;
-
- if ((!table->ctl_name && !ref->ctl_name) ||
- (table->ctl_name && ref->ctl_name &&
- (table->ctl_name == ref->ctl_name)))
- match++;
-
- if (match != 2) {
- printk(KERN_ERR "%s: failed: ", __func__);
- sysctl_print_path(table);
- printk(" ref: ");
- sysctl_print_path(ref);
- printk("\n");
- error = -EINVAL;
- }
- }
- return error;
-}
-
static void sysctl_check_leaf(struct nsproxy *namespaces,
struct ctl_table *table, const char **fail)
{
@@ -1434,37 +104,15 @@ static void sysctl_check_leaf(struct nsproxy *namespaces,
set_fail(fail, table, "Sysctl already exists");
}
-static void sysctl_check_bin_path(struct ctl_table *table, const char **fail)
-{
- const struct trans_ctl_table *ref;
-
- ref = sysctl_binary_lookup(table);
- if (table->ctl_name && !ref)
- set_fail(fail, table, "Unknown sysctl binary path");
- if (ref) {
- if (ref->procname &&
- (!table->procname ||
- (strcmp(table->procname, ref->procname) != 0)))
- set_fail(fail, table, "procname does not match binary path procname");
-
- if (ref->ctl_name && table->ctl_name &&
- (table->ctl_name != ref->ctl_name))
- set_fail(fail, table, "ctl_name does not match binary path ctl_name");
- }
-}
-
int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
{
int error = 0;
- for (; table->ctl_name || table->procname; table++) {
+ for (; table->procname; table++) {
const char *fail = NULL;
- sysctl_repair_table(table);
if (table->parent) {
if (table->procname && !table->parent->procname)
set_fail(&fail, table, "Parent without procname");
- if (table->ctl_name && !table->parent->ctl_name)
- set_fail(&fail, table, "Parent without ctl_name");
}
if (!table->procname)
set_fail(&fail, table, "No procname");
@@ -1477,21 +125,12 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
set_fail(&fail, table, "Writable sysctl directory");
if (table->proc_handler)
set_fail(&fail, table, "Directory with proc_handler");
- if (table->strategy)
- set_fail(&fail, table, "Directory with strategy");
if (table->extra1)
set_fail(&fail, table, "Directory with extra1");
if (table->extra2)
set_fail(&fail, table, "Directory with extra2");
- if (sysctl_check_dir(namespaces, table))
- set_fail(&fail, table, "Inconsistent directory names");
} else {
- if ((table->strategy == sysctl_data) ||
- (table->strategy == sysctl_string) ||
- (table->strategy == sysctl_intvec) ||
- (table->strategy == sysctl_jiffies) ||
- (table->strategy == sysctl_ms_jiffies) ||
- (table->proc_handler == proc_dostring) ||
+ if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
@@ -1513,14 +152,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
set_fail(&fail, table, "No max");
}
}
-#ifdef CONFIG_SYSCTL_SYSCALL
- if (table->ctl_name && !table->strategy)
- set_fail(&fail, table, "Missing strategy");
-#endif
-#if 0
- if (!table->ctl_name && table->strategy)
- set_fail(&fail, table, "Strategy without ctl_name");
-#endif
#ifdef CONFIG_PROC_SYSCTL
if (table->procname && !table->proc_handler)
set_fail(&fail, table, "No proc_handler");
@@ -1531,7 +162,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
#endif
sysctl_check_leaf(namespaces, table, &fail);
}
- sysctl_check_bin_path(table, &fail);
if (table->mode > 0777)
set_fail(&fail, table, "bogus .mode");
if (fail) {
diff --git a/kernel/time.c b/kernel/time.c
index 804798005d19..c6324d96009e 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -136,7 +136,6 @@ static inline void warp_clock(void)
write_seqlock_irq(&xtime_lock);
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
xtime.tv_sec += sys_tz.tz_minuteswest * 60;
- update_xtime_cache(0);
write_sequnlock_irq(&xtime_lock);
clock_was_set();
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc32..6f740d9f0948 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -20,6 +20,8 @@
#include <linux/sysdev.h>
#include <linux/tick.h>
+#include "tick-internal.h"
+
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
@@ -28,7 +30,7 @@ static LIST_HEAD(clockevents_released);
static RAW_NOTIFIER_HEAD(clockevents_chain);
/* Protection for the above */
-static DEFINE_SPINLOCK(clockevents_lock);
+static DEFINE_RAW_SPINLOCK(clockevents_lock);
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock);
*
* Math helper, returns latch value converted to nanoseconds (bound checked)
*/
-unsigned long clockevent_delta2ns(unsigned long latch,
- struct clock_event_device *evt)
+u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
{
- u64 clc = ((u64) latch << evt->shift);
+ u64 clc = (u64) latch << evt->shift;
if (unlikely(!evt->mult)) {
evt->mult = 1;
@@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch,
do_div(clc, evt->mult);
if (clc < 1000)
clc = 1000;
- if (clc > LONG_MAX)
- clc = LONG_MAX;
+ if (clc > KTIME_MAX)
+ clc = KTIME_MAX;
- return (unsigned long) clc;
+ return clc;
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
@@ -140,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb)
unsigned long flags;
int ret;
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
@@ -184,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev)
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
@@ -237,10 +238,11 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
void clockevents_notify(unsigned long reason, void *arg)
{
- struct list_head *node, *tmp;
+ struct clock_event_device *dev, *tmp;
unsigned long flags;
+ int cpu;
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
switch (reason) {
@@ -249,13 +251,24 @@ void clockevents_notify(unsigned long reason, void *arg)
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
- list_for_each_safe(node, tmp, &clockevents_released)
- list_del(node);
+ list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+ list_del(&dev->list);
+ /*
+ * Now check whether the CPU has left unused per cpu devices
+ */
+ cpu = *((int *)arg);
+ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+ if (cpumask_test_cpu(cpu, dev->cpumask) &&
+ cpumask_weight(dev->cpumask) == 1) {
+ BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ list_del(&dev->list);
+ }
+ }
break;
default:
break;
}
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5e18c6ab2c6a..e85c23404d34 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -39,7 +39,7 @@ void timecounter_init(struct timecounter *tc,
tc->cycle_last = cc->read(cc);
tc->nsec = start_tstamp;
}
-EXPORT_SYMBOL(timecounter_init);
+EXPORT_SYMBOL_GPL(timecounter_init);
/**
* timecounter_read_delta - get nanoseconds since last call of this function
@@ -83,7 +83,7 @@ u64 timecounter_read(struct timecounter *tc)
return nsec;
}
-EXPORT_SYMBOL(timecounter_read);
+EXPORT_SYMBOL_GPL(timecounter_read);
u64 timecounter_cyc2time(struct timecounter *tc,
cycle_t cycle_tstamp)
@@ -105,7 +105,60 @@ u64 timecounter_cyc2time(struct timecounter *tc,
return nsec;
}
-EXPORT_SYMBOL(timecounter_cyc2time);
+EXPORT_SYMBOL_GPL(timecounter_cyc2time);
+
+/**
+ * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
+ * @mult: pointer to mult variable
+ * @shift: pointer to shift variable
+ * @from: frequency to convert from
+ * @to: frequency to convert to
+ * @minsec: guaranteed runtime conversion range in seconds
+ *
+ * The function evaluates the shift/mult pair for the scaled math
+ * operations of clocksources and clockevents.
+ *
+ * @to and @from are frequency values in HZ. For clock sources @to is
+ * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
+ * event @to is the counter frequency and @from is NSEC_PER_SEC.
+ *
+ * The @minsec conversion range argument controls the time frame in
+ * seconds which must be covered by the runtime conversion with the
+ * calculated mult and shift factors. This guarantees that no 64bit
+ * overflow happens when the input value of the conversion is
+ * multiplied with the calculated mult factor. Larger ranges may
+ * reduce the conversion accuracy by chosing smaller mult and shift
+ * factors.
+ */
+void
+clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
+{
+ u64 tmp;
+ u32 sft, sftacc= 32;
+
+ /*
+ * Calculate the shift factor which is limiting the conversion
+ * range:
+ */
+ tmp = ((u64)minsec * from) >> 32;
+ while (tmp) {
+ tmp >>=1;
+ sftacc--;
+ }
+
+ /*
+ * Find the conversion shift/mult pair which has the best
+ * accuracy and fits the maxsec conversion range:
+ */
+ for (sft = 32; sft > 0; sft--) {
+ tmp = (u64) to << sft;
+ do_div(tmp, from);
+ if ((tmp >> sftacc) == 0)
+ break;
+ }
+ *mult = tmp;
+ *shift = sft;
+}
/*[Clocksource internal variables]---------
* curr_clocksource:
@@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void)
clocksource_resume_watchdog();
}
+/**
+ * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * @cs: Pointer to clocksource
+ *
+ */
+static u64 clocksource_max_deferment(struct clocksource *cs)
+{
+ u64 max_nsecs, max_cycles;
+
+ /*
+ * Calculate the maximum number of cycles that we can pass to the
+ * cyc2ns function without overflowing a 64-bit signed result. The
+ * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
+ * is equivalent to the below.
+ * max_cycles < (2^63)/cs->mult
+ * max_cycles < 2^(log2((2^63)/cs->mult))
+ * max_cycles < 2^(log2(2^63) - log2(cs->mult))
+ * max_cycles < 2^(63 - log2(cs->mult))
+ * max_cycles < 1 << (63 - log2(cs->mult))
+ * Please note that we add 1 to the result of the log2 to account for
+ * any rounding errors, ensure the above inequality is satisfied and
+ * no overflow will occur.
+ */
+ max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+
+ /*
+ * The actual maximum number of cycles we can defer the clocksource is
+ * determined by the minimum of max_cycles and cs->mask.
+ */
+ max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
+ max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+
+ /*
+ * To ensure that the clocksource does not wrap whilst we are idle,
+ * limit the time the clocksource can be deferred by 12.5%. Please
+ * note a margin of 12.5% is used because this can be computed with
+ * a shift, versus say 10% which would require division.
+ */
+ return max_nsecs - (max_nsecs >> 5);
+}
+
#ifdef CONFIG_GENERIC_TIME
/**
@@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
*/
int clocksource_register(struct clocksource *cs)
{
+ /* calculate max idle time permitted for this clocksource */
+ cs->max_idle_ns = clocksource_max_deferment(cs);
+
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_select();
@@ -580,7 +677,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
* @count: length of buffer
*
* Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
+ * clocksource selection.
*/
static ssize_t sysfs_override_clocksource(struct sys_device *dev,
struct sysdev_attribute *attr,
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c2ec25087a35..b3bafd5fc66d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device;
/* FIXME: Use cpumask_var_t. */
static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
static DECLARE_BITMAP(tmpmask, NR_CPUS);
-static DEFINE_SPINLOCK(tick_broadcast_lock);
+static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
#ifdef CONFIG_TICK_ONESHOT
@@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
unsigned long flags;
int ret = 0;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Devices might be registered with both periodic and oneshot
@@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
tick_broadcast_clear_oneshot(cpu);
}
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
@@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask)
*/
static void tick_do_periodic_broadcast(void)
{
- spin_lock(&tick_broadcast_lock);
+ raw_spin_lock(&tick_broadcast_lock);
cpumask_and(to_cpumask(tmpmask),
cpu_online_mask, tick_get_broadcast_mask());
tick_do_broadcast(to_cpumask(tmpmask));
- spin_unlock(&tick_broadcast_lock);
+ raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
unsigned long flags;
int cpu, bc_stopped;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
@@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
tick_broadcast_setup_oneshot(bc);
}
out:
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
@@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
unsigned long flags;
unsigned int cpu = *cpup;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
@@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
clockevents_shutdown(bc);
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
void tick_suspend_broadcast(void)
@@ -317,13 +317,13 @@ void tick_suspend_broadcast(void)
struct clock_event_device *bc;
unsigned long flags;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc)
clockevents_shutdown(bc);
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
int tick_resume_broadcast(void)
@@ -332,7 +332,7 @@ int tick_resume_broadcast(void)
unsigned long flags;
int broadcast = 0;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
@@ -351,7 +351,7 @@ int tick_resume_broadcast(void)
break;
}
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return broadcast;
}
@@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
ktime_t now, next_event;
int cpu;
- spin_lock(&tick_broadcast_lock);
+ raw_spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
@@ -443,7 +443,7 @@ again:
if (tick_broadcast_set_event(next_event, 0))
goto again;
}
- spin_unlock(&tick_broadcast_lock);
+ raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
unsigned long flags;
int cpu;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Periodic mode does not care about the enter/exit of power
@@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
}
out:
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
@@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void)
struct clock_event_device *bc;
unsigned long flags;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
@@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
unsigned long flags;
unsigned int cpu = *cpup;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Clear the broadcast mask flag for the dead cpu, but do not
@@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
*/
cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83c4417b6a3c..b6b898d2eeef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
ktime_t tick_next_period;
ktime_t tick_period;
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-DEFINE_SPINLOCK(tick_device_lock);
+static DEFINE_RAW_SPINLOCK(tick_device_lock);
/*
* Debugging: see timer_list.c
@@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
int cpu, ret = NOTIFY_OK;
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return NOTIFY_STOP;
out_bc:
@@ -278,7 +278,7 @@ out_bc:
if (tick_check_broadcast_device(newdev))
ret = NOTIFY_STOP;
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return ret;
}
@@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup)
struct clock_event_device *dev = td->evtdev;
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
@@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup)
clockevents_exchange_device(dev, NULL);
td->evtdev = NULL;
}
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_suspend(void)
@@ -330,9 +330,9 @@ static void tick_suspend(void)
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_shutdown(td->evtdev);
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_resume(void)
@@ -341,7 +341,7 @@ static void tick_resume(void)
unsigned long flags;
int broadcast = tick_resume_broadcast();
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
if (!broadcast) {
@@ -350,7 +350,7 @@ static void tick_resume(void)
else
tick_resume_oneshot();
}
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b1c05bf75ee0..290eefbc1f60 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,7 +6,6 @@
#define TICK_DO_TIMER_BOOT -2
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
-extern spinlock_t tick_device_lock;
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index a96c0e2b89cf..0a8a213016f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
dev->min_delta_ns += dev->min_delta_ns >> 1;
printk(KERN_WARNING
- "CE: %s increasing min_delta_ns to %lu nsec\n",
+ "CE: %s increasing min_delta_ns to %llu nsec\n",
dev->name ? dev->name : "?",
- dev->min_delta_ns << 1);
+ (unsigned long long) dev->min_delta_ns << 1);
i = 0;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89aed5933ed4..f992762d7f51 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
* value. We do this unconditionally on any cpu, as we don't know whether the
* cpu, which has the update task assigned is in a long sleep.
*/
-static void tick_nohz_update_jiffies(void)
+static void tick_nohz_update_jiffies(ktime_t now)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long flags;
- ktime_t now;
-
- if (!ts->tick_stopped)
- return;
cpumask_clear_cpu(cpu, nohz_cpu_mask);
- now = ktime_get();
ts->idle_waketime = now;
local_irq_save(flags);
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void)
touch_softlockup_watchdog();
}
-static void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu, ktime_t now)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t delta;
- if (ts->idle_active) {
- ktime_t now, delta;
- now = ktime_get();
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_lastupdate = now;
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- ts->idle_active = 0;
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ ts->idle_active = 0;
- sched_clock_idle_wakeup_event(0);
- }
+ sched_clock_idle_wakeup_event(0);
}
static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
struct tick_sched *ts;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ u64 time_delta;
int cpu;
local_irq_save(flags);
@@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle)
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
- local_softirq_pending());
+ (unsigned int) local_softirq_pending());
ratelimit++;
}
goto end;
@@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle)
seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
+ time_delta = timekeeping_max_deferment();
} while (read_seqretry(&xtime_lock, seq));
- /* Get the next timer wheel timer */
- next_jiffies = get_next_timer_interrupt(last_jiffies);
- delta_jiffies = next_jiffies - last_jiffies;
-
- if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
+ if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+ arch_needs_cpu(cpu)) {
+ next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
+ } else {
+ /* Get the next timer wheel timer */
+ next_jiffies = get_next_timer_interrupt(last_jiffies);
+ delta_jiffies = next_jiffies - last_jiffies;
+ }
/*
* Do not stop the tick, if we are only one off
* or if the cpu is required for rcu
@@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle)
if ((long)delta_jiffies >= 1) {
/*
- * calculate the expiry time for the next timer wheel
- * timer
- */
- expires = ktime_add_ns(last_update, tick_period.tv64 *
- delta_jiffies);
-
- /*
* If this cpu is the one which updates jiffies, then
* give up the assignment and let it be taken by the
* cpu which runs the tick timer next, which might be
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
- * invoked.
+ * invoked. Keep track of the fact that it was the one
+ * which had the do_timer() duty last. If this cpu is
+ * the one which had the do_timer() duty last, we
+ * limit the sleep time to the timekeeping
+ * max_deferement value which we retrieved
+ * above. Otherwise we can sleep as long as we want.
*/
- if (cpu == tick_do_timer_cpu)
+ if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ ts->do_timer_last = 1;
+ } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ time_delta = KTIME_MAX;
+ ts->do_timer_last = 0;
+ } else if (!ts->do_timer_last) {
+ time_delta = KTIME_MAX;
+ }
+
+ /*
+ * calculate the expiry time for the next timer wheel
+ * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
+ * that there is no timer pending or at least extremely
+ * far into the future (12 days for HZ=1000). In this
+ * case we set the expiry to the end of time.
+ */
+ if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
+ /*
+ * Calculate the time delta for the next timer event.
+ * If the time delta exceeds the maximum time delta
+ * permitted by the current clocksource then adjust
+ * the time delta accordingly to ensure the
+ * clocksource does not wrap.
+ */
+ time_delta = min_t(u64, time_delta,
+ tick_period.tv64 * delta_jiffies);
+ }
+
+ if (time_delta < KTIME_MAX)
+ expires = ktime_add_ns(last_update, time_delta);
+ else
+ expires.tv64 = KTIME_MAX;
if (delta_jiffies > 1)
cpumask_set_cpu(cpu, nohz_cpu_mask);
@@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->idle_sleeps++;
+ /* Mark expires */
+ ts->idle_expires = expires;
+
/*
- * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
- * there is no timer pending or at least extremly far
- * into the future (12 days for HZ=1000). In this case
- * we simply stop the tick timer:
+ * If the expiration time == KTIME_MAX, then
+ * in this case we simply stop the tick timer.
*/
- if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
- ts->idle_expires.tv64 = KTIME_MAX;
+ if (unlikely(expires.tv64 == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
goto out;
}
- /* Mark expiries */
- ts->idle_expires = expires;
-
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED);
@@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void)
ktime_t now;
local_irq_disable();
- tick_nohz_stop_idle(cpu);
+ if (ts->idle_active || (ts->inidle && ts->tick_stopped))
+ now = ktime_get();
+
+ if (ts->idle_active)
+ tick_nohz_stop_idle(cpu, now);
if (!ts->inidle || !ts->tick_stopped) {
ts->inidle = 0;
@@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void)
/* Update jiffies first */
select_nohz_load_balancer(0);
- now = ktime_get();
tick_do_update_jiffies64(now);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
@@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void)
* timer and do not touch the other magic bits which need to be done
* when idle is left.
*/
-static void tick_nohz_kick_tick(int cpu)
+static void tick_nohz_kick_tick(int cpu, ktime_t now)
{
#if 0
/* Switch back to 2.6.27 behaviour */
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- ktime_t delta, now;
-
- if (!ts->tick_stopped)
- return;
+ ktime_t delta;
/*
* Do not touch the tick device, when the next expiry is either
* already reached or less/equal than the tick period.
*/
- now = ktime_get();
delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
if (delta.tv64 <= tick_period.tv64)
return;
@@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu)
#endif
}
+static inline void tick_check_nohz(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now;
+
+ if (!ts->idle_active && !ts->tick_stopped)
+ return;
+ now = ktime_get();
+ if (ts->idle_active)
+ tick_nohz_stop_idle(cpu, now);
+ if (ts->tick_stopped) {
+ tick_nohz_update_jiffies(now);
+ tick_nohz_kick_tick(cpu, now);
+ }
+}
+
#else
static inline void tick_nohz_switch_to_nohz(void) { }
+static inline void tick_check_nohz(int cpu) { }
#endif /* NO_HZ */
@@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
void tick_check_idle(int cpu)
{
tick_check_oneshot_broadcast(cpu);
-#ifdef CONFIG_NO_HZ
- tick_nohz_stop_idle(cpu);
- tick_nohz_update_jiffies();
- tick_nohz_kick_tick(cpu);
-#endif
+ tick_check_nohz(cpu);
}
/*
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 71e7f1a19156..12f5c55090be 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -40,7 +40,7 @@ ktime_t timecompare_transform(struct timecompare *sync,
return ns_to_ktime(nsec);
}
-EXPORT_SYMBOL(timecompare_transform);
+EXPORT_SYMBOL_GPL(timecompare_transform);
int timecompare_offset(struct timecompare *sync,
s64 *offset,
@@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync,
* source time
*/
sample.offset =
- ktime_to_ns(ktime_add(end, start)) / 2 -
+ (ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
ts;
/* simple insertion sort based on duration */
@@ -131,7 +131,7 @@ int timecompare_offset(struct timecompare *sync,
return used;
}
-EXPORT_SYMBOL(timecompare_offset);
+EXPORT_SYMBOL_GPL(timecompare_offset);
void __timecompare_update(struct timecompare *sync,
u64 source_tstamp)
@@ -188,4 +188,4 @@ void __timecompare_update(struct timecompare *sync,
}
}
}
-EXPORT_SYMBOL(__timecompare_update);
+EXPORT_SYMBOL_GPL(__timecompare_update);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c3a4e2907eaa..af4135f05825 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,19 +165,12 @@ struct timespec raw_time;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
- xtime_cache = xtime;
- timespec_add_ns(&xtime_cache, nsec);
-}
-
/* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
xtime.tv_sec += leapsecond;
wall_to_monotonic.tv_sec -= leapsecond;
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
#ifdef CONFIG_GENERIC_TIME
@@ -332,12 +325,10 @@ int do_settimeofday(struct timespec *tv)
xtime = *tv;
- update_xtime_cache(0);
-
timekeeper.ntp_error = 0;
ntp_clear();
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -488,6 +479,17 @@ int timekeeping_valid_for_hres(void)
}
/**
+ * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ *
+ * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ * ensure that the clocksource does not change!
+ */
+u64 timekeeping_max_deferment(void)
+{
+ return timekeeper.clock->max_idle_ns;
+}
+
+/**
* read_persistent_clock - Return time from the persistent clock.
*
* Weak dummy function for arches that do not yet support it.
@@ -548,7 +550,6 @@ void __init timekeeping_init(void)
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
- update_xtime_cache(0);
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
total_sleep_time = timespec_add_safe(total_sleep_time, ts);
}
- update_xtime_cache(0);
/* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
@@ -723,6 +723,49 @@ static void timekeeping_adjust(s64 offset)
}
/**
+ * logarithmic_accumulation - shifted accumulation of cycles
+ *
+ * This functions accumulates a shifted interval of cycles into
+ * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ * loop.
+ *
+ * Returns the unconsumed cycles.
+ */
+static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+{
+ u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+
+ /* If the offset is smaller then a shifted interval, do nothing */
+ if (offset < timekeeper.cycle_interval<<shift)
+ return offset;
+
+ /* Accumulate one shifted interval */
+ offset -= timekeeper.cycle_interval << shift;
+ timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+
+ timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+ while (timekeeper.xtime_nsec >= nsecps) {
+ timekeeper.xtime_nsec -= nsecps;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+
+ /* Accumulate into raw time */
+ raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+ while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+ raw_time.tv_nsec -= NSEC_PER_SEC;
+ raw_time.tv_sec++;
+ }
+
+ /* Accumulate error between NTP and clock interval */
+ timekeeper.ntp_error += tick_length << shift;
+ timekeeper.ntp_error -= timekeeper.xtime_interval <<
+ (timekeeper.ntp_error_shift + shift);
+
+ return offset;
+}
+
+/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
* Called from the timer interrupt, must hold a write on xtime_lock.
@@ -731,7 +774,7 @@ void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
- u64 nsecs;
+ int shift = 0, maxshift;
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
@@ -745,33 +788,22 @@ void update_wall_time(void)
#endif
timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
- /* normally this loop will run just once, however in the
- * case of lost or late ticks, it will accumulate correctly.
+ /*
+ * With NO_HZ we may have to accumulate many cycle_intervals
+ * (think "ticks") worth of time at once. To do this efficiently,
+ * we calculate the largest doubling multiple of cycle_intervals
+ * that is smaller then the offset. We then accumulate that
+ * chunk in one go, and then try to consume the next smaller
+ * doubled multiple.
*/
+ shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+ shift = max(0, shift);
+ /* Bound shift to one less then what overflows tick_length */
+ maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+ shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
- u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
-
- /* accumulate one interval */
- offset -= timekeeper.cycle_interval;
- clock->cycle_last += timekeeper.cycle_interval;
-
- timekeeper.xtime_nsec += timekeeper.xtime_interval;
- if (timekeeper.xtime_nsec >= nsecps) {
- timekeeper.xtime_nsec -= nsecps;
- xtime.tv_sec++;
- second_overflow();
- }
-
- raw_time.tv_nsec += timekeeper.raw_interval;
- if (raw_time.tv_nsec >= NSEC_PER_SEC) {
- raw_time.tv_nsec -= NSEC_PER_SEC;
- raw_time.tv_sec++;
- }
-
- /* accumulate error between NTP and clock interval */
- timekeeper.ntp_error += tick_length;
- timekeeper.ntp_error -= timekeeper.xtime_interval <<
- timekeeper.ntp_error_shift;
+ offset = logarithmic_accumulation(offset, shift);
+ shift--;
}
/* correct the clock when NTP error is too big */
@@ -807,11 +839,8 @@ void update_wall_time(void)
timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift;
- nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
- update_xtime_cache(nsecs);
-
/* check to see if there is a new clocksource to use */
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
/**
@@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts)
unsigned long get_seconds(void)
{
- return xtime_cache.tv_sec;
+ return xtime.tv_sec;
}
EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
- return xtime_cache;
+ return xtime;
}
struct timespec current_kernel_time(void)
@@ -862,8 +891,7 @@ struct timespec current_kernel_time(void)
do {
seq = read_seqbegin(&xtime_lock);
-
- now = xtime_cache;
+ now = xtime;
} while (read_seqretry(&xtime_lock, seq));
return now;
@@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void)
do {
seq = read_seqbegin(&xtime_lock);
-
- now = xtime_cache;
+ now = xtime;
mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1b5b7aa2fdfd..bdfb8dd1050c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
next_one:
i = 0;
- spin_lock_irqsave(&base->cpu_base->lock, flags);
+ raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = base->first;
/*
@@ -100,13 +100,13 @@ next_one:
timer = rb_entry(curr, struct hrtimer, node);
tmp = *timer;
- spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
print_timer(m, timer, &tmp, i, now);
next++;
goto next_one;
}
- spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
}
static void
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P_ns(expires_next);
P(hres_active);
P(nr_events);
+ P(nr_retries);
+ P(nr_hangs);
+ P_ns(max_hang_time);
#endif
#undef P
#undef P_ns
@@ -204,10 +207,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
return;
}
SEQ_printf(m, "%s\n", dev->name);
- SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns);
- SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns);
- SEQ_printf(m, " mult: %lu\n", dev->mult);
- SEQ_printf(m, " shift: %d\n", dev->shift);
+ SEQ_printf(m, " max_delta_ns: %llu\n",
+ (unsigned long long) dev->max_delta_ns);
+ SEQ_printf(m, " min_delta_ns: %llu\n",
+ (unsigned long long) dev->min_delta_ns);
+ SEQ_printf(m, " mult: %u\n", dev->mult);
+ SEQ_printf(m, " shift: %u\n", dev->shift);
SEQ_printf(m, " mode: %d\n", dev->mode);
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
@@ -232,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
- tick_get_broadcast_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_mask())[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
- tick_get_broadcast_oneshot_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
#endif
SEQ_printf(m, "\n");
#endif
@@ -252,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.4\n");
+ SEQ_printf(m, "Timer List Version: v0.5\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index ee5681f8d7ec..2f3b585b8d7d 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
*/
-static DEFINE_PER_CPU(spinlock_t, lookup_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
/*
* Mutex to serialize state changes with show-stats activities:
@@ -238,14 +238,14 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
/*
* It doesnt matter which lock we take:
*/
- spinlock_t *lock;
+ raw_spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
if (likely(!timer_stats_active))
return;
- lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+ lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
input.timer = timer;
input.start_func = startf;
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
input.pid = pid;
input.timer_flag = timer_flag;
- spin_lock_irqsave(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
if (!timer_stats_active)
goto out_unlock;
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
atomic_inc(&overflow_count);
out_unlock:
- spin_unlock_irqrestore(lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,9 +348,11 @@ static void sync_access(void)
int cpu;
for_each_online_cpu(cpu) {
- spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
+ raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
+
+ raw_spin_lock_irqsave(lock, flags);
/* nothing */
- spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
}
@@ -408,7 +410,7 @@ void __init init_timer_stats(void)
int cpu;
for_each_possible_cpu(cpu)
- spin_lock_init(&per_cpu(lookup_lock, cpu));
+ raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
}
static int __init init_tstats_procfs(void)
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..15533b792397 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- new_base = __get_cpu_var(tvec_bases);
-
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e51a1bcb7bed..7968762c8167 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1724,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
return ftrace_match(str, regex, len, type);
}
-static void ftrace_match_records(char *buff, int len, int enable)
+static int ftrace_match_records(char *buff, int len, int enable)
{
unsigned int search_len;
struct ftrace_page *pg;
@@ -1733,6 +1733,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
char *search;
int type;
int not;
+ int found = 0;
flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
type = filter_parse_regex(buff, len, &search, &not);
@@ -1750,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
rec->flags &= ~flag;
else
rec->flags |= flag;
+ found = 1;
}
/*
* Only enable filtering if we have a function that
@@ -1759,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
ftrace_filtered = 1;
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
+
+ return found;
}
static int
@@ -1780,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
return 1;
}
-static void ftrace_match_module_records(char *buff, char *mod, int enable)
+static int ftrace_match_module_records(char *buff, char *mod, int enable)
{
unsigned search_len = 0;
struct ftrace_page *pg;
@@ -1789,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
char *search = buff;
unsigned long flag;
int not = 0;
+ int found = 0;
flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
@@ -1819,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
rec->flags &= ~flag;
else
rec->flags |= flag;
+ found = 1;
}
if (enable && (rec->flags & FTRACE_FL_FILTER))
ftrace_filtered = 1;
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
+
+ return found;
}
/*
@@ -1853,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
if (!strlen(mod))
return -EINVAL;
- ftrace_match_module_records(func, mod, enable);
- return 0;
+ if (ftrace_match_module_records(func, mod, enable))
+ return 0;
+ return -EINVAL;
}
static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2151,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
func = strsep(&next, ":");
if (!next) {
- ftrace_match_records(func, len, enable);
- return 0;
+ if (ftrace_match_records(func, len, enable))
+ return 0;
+ return ret;
}
/* command found */
@@ -2198,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
!trace_parser_cont(parser)) {
ret = ftrace_process_regex(parser->buffer,
parser->idx, enable);
+ trace_parser_clear(parser);
if (ret)
goto out_unlock;
-
- trace_parser_clear(parser);
}
ret = read;
@@ -2543,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
exists = true;
break;
}
- if (!exists) {
+ if (!exists)
array[(*idx)++] = rec->ip;
- found = 1;
- }
+ found = 1;
}
} while_for_each_ftrace_rec();
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
#define CREATE_TRACE_POINTS
#include <trace/events/power.h>
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1ca4956ab5e..2326b04c95c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -423,7 +423,7 @@ struct ring_buffer_per_cpu {
int cpu;
struct ring_buffer *buffer;
spinlock_t reader_lock; /* serialize readers */
- raw_spinlock_t lock;
+ arch_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head *pages;
struct buffer_page *head_page; /* read from head */
@@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->buffer = buffer;
spin_lock_init(&cpu_buffer->reader_lock);
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
- cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
@@ -1193,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
struct list_head *p;
unsigned i;
- atomic_inc(&cpu_buffer->record_disabled);
- synchronize_sched();
-
spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
@@ -1211,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
return;
rb_reset_cpu(cpu_buffer);
- spin_unlock_irq(&cpu_buffer->reader_lock);
-
rb_check_pages(cpu_buffer);
- atomic_dec(&cpu_buffer->record_disabled);
-
+ spin_unlock_irq(&cpu_buffer->reader_lock);
}
static void
@@ -1227,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *p;
unsigned i;
- atomic_inc(&cpu_buffer->record_disabled);
- synchronize_sched();
-
spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
@@ -1242,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
list_add_tail(&bpage->list, cpu_buffer->pages);
}
rb_reset_cpu(cpu_buffer);
- spin_unlock_irq(&cpu_buffer->reader_lock);
-
rb_check_pages(cpu_buffer);
- atomic_dec(&cpu_buffer->record_disabled);
+ spin_unlock_irq(&cpu_buffer->reader_lock);
}
/**
@@ -1254,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
* @buffer: the buffer to resize.
* @size: the new size.
*
- * The tracer is responsible for making sure that the buffer is
- * not being used while changing the size.
- * Note: We may be able to change the above requirement by using
- * RCU synchronizations.
- *
* Minimum size is 2 * BUF_PAGE_SIZE.
*
* Returns -1 on failure.
@@ -1290,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
if (size == buffer_size)
return size;
+ atomic_inc(&buffer->record_disabled);
+
+ /* Make sure all writers are done with this buffer. */
+ synchronize_sched();
+
mutex_lock(&buffer->mutex);
get_online_cpus();
@@ -1352,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
put_online_cpus();
mutex_unlock(&buffer->mutex);
+ atomic_dec(&buffer->record_disabled);
+
return size;
free_pages:
@@ -1361,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
}
put_online_cpus();
mutex_unlock(&buffer->mutex);
+ atomic_dec(&buffer->record_disabled);
return -ENOMEM;
/*
@@ -1370,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
out_fail:
put_online_cpus();
mutex_unlock(&buffer->mutex);
+ atomic_dec(&buffer->record_disabled);
return -1;
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -2834,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
int ret;
local_irq_save(flags);
- __raw_spin_lock(&cpu_buffer->lock);
+ arch_spin_lock(&cpu_buffer->lock);
again:
/*
@@ -2923,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
- __raw_spin_unlock(&cpu_buffer->lock);
+ arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
return reader;
@@ -3286,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
synchronize_sched();
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- __raw_spin_lock(&cpu_buffer->lock);
+ arch_spin_lock(&cpu_buffer->lock);
rb_iter_reset(iter);
- __raw_spin_unlock(&cpu_buffer->lock);
+ arch_spin_unlock(&cpu_buffer->lock);
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return iter;
@@ -3408,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
goto out;
- __raw_spin_lock(&cpu_buffer->lock);
+ arch_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
- __raw_spin_unlock(&cpu_buffer->lock);
+ arch_spin_unlock(&cpu_buffer->lock);
out:
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 874f2893cff0..8b9f20ab8eed 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
@@ -86,17 +86,17 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
*/
static int tracing_disabled = 1;
-DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(int, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
{
preempt_disable();
- local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
}
static inline void ftrace_enable_cpu(void)
{
- local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
preempt_enable();
}
@@ -203,7 +203,7 @@ cycle_t ftrace_now(int cpu)
*/
static struct trace_array max_tr;
-static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
"bin",
"block",
"stacktrace",
- "sched-tree",
"trace_printk",
"ftrace_preempt",
"branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
* protected by per_cpu spinlocks. But the action of the swap
* needs its own lock.
*
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
* with performance when lockdep debugging is enabled.
*
* It is also used in other places outside the update_max_tr
* so it needs to be defined outside of the
* CONFIG_TRACER_MAX_TRACE.
*/
-static raw_spinlock_t ftrace_max_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t ftrace_max_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
max_tr.buffer = buf;
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
/**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
*/
- if (!__raw_spin_trylock(&trace_cmdline_lock))
+ if (!arch_spin_trylock(&trace_cmdline_lock))
return;
idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
}
void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
}
preempt_disable();
- __raw_spin_lock(&trace_cmdline_lock);
+ arch_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
}
@@ -1085,7 +1084,7 @@ trace_function(struct trace_array *tr,
struct ftrace_entry *entry;
/* If we are reading the ring buffer, don't trace */
- if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
__ftrace_trace_stack(tr->buffer, flags, skip, pc);
}
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+ unsigned long flags;
+
+ if (tracing_disabled || tracing_selftest_running)
+ return;
+
+ local_save_flags(flags);
+
+ /* skipping 3 traces, seems to get us at the caller of this function */
+ __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static u32 trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
/* Lockdep uses trace_printk for lock tracing */
local_irq_save(flags);
- __raw_spin_lock(&trace_buf_lock);
+ arch_spin_lock(&trace_buf_lock);
len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
ring_buffer_unlock_commit(buffer, event);
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
local_irq_restore(flags);
out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1375,8 @@ int trace_array_vprintk(struct trace_array *tr,
pause_graph_tracing();
raw_local_irq_save(irq_flags);
- __raw_spin_lock(&trace_buf_lock);
- if (args == NULL) {
- strncpy(trace_buf, fmt, TRACE_BUF_SIZE);
- len = strlen(trace_buf);
- } else
- len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+ arch_spin_lock(&trace_buf_lock);
+ len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
size = sizeof(*entry) + len + 1;
buffer = tr->buffer;
@@ -1382,7 +1393,7 @@ int trace_array_vprintk(struct trace_array *tr,
ring_buffer_unlock_commit(buffer, event);
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
raw_local_irq_restore(irq_flags);
unpause_graph_tracing();
out:
@@ -1516,6 +1527,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
int i = (int)*pos;
void *ent;
+ WARN_ON_ONCE(iter->leftover);
+
(*pos)++;
/* can't go backwards */
@@ -1614,8 +1627,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
;
} else {
- l = *pos - 1;
- p = s_next(m, p, &l);
+ /*
+ * If we overflowed the seq_file before, then we want
+ * to just reuse the trace_seq buffer again.
+ */
+ if (iter->leftover)
+ p = iter;
+ else {
+ l = *pos - 1;
+ p = s_next(m, p, &l);
+ }
}
trace_event_read_lock();
@@ -1923,6 +1944,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
+ int ret;
if (iter->ent == NULL) {
if (iter->tr) {
@@ -1942,9 +1964,27 @@ static int s_show(struct seq_file *m, void *v)
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_func_help_header(m);
}
+ } else if (iter->leftover) {
+ /*
+ * If we filled the seq_file buffer earlier, we
+ * want to just show it now.
+ */
+ ret = trace_print_seq(m, &iter->seq);
+
+ /* ret should this time be zero, but you never know */
+ iter->leftover = ret;
+
} else {
print_trace_line(iter);
- trace_print_seq(m, &iter->seq);
+ ret = trace_print_seq(m, &iter->seq);
+ /*
+ * If we overflow the seq_file buffer, then it will
+ * ask us for this data again at start up.
+ * Use that instead.
+ * ret is 0 if seq_file write succeeded.
+ * -1 otherwise.
+ */
+ iter->leftover = ret;
}
return 0;
@@ -2254,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
* Increase/decrease the disabled counter if we are
@@ -2269,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
atomic_dec(&global_trace.data[cpu]->disabled);
}
}
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
local_irq_enable();
cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2291,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
.write = tracing_cpumask_write,
};
-static ssize_t
-tracing_trace_options_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_trace_options_show(struct seq_file *m, void *v)
{
struct tracer_opt *trace_opts;
u32 tracer_flags;
- int len = 0;
- char *buf;
- int r = 0;
int i;
-
- /* calculate max size */
- for (i = 0; trace_options[i]; i++) {
- len += strlen(trace_options[i]);
- len += 3; /* "no" and newline */
- }
-
mutex_lock(&trace_types_lock);
tracer_flags = current_trace->flags->val;
trace_opts = current_trace->flags->opts;
- /*
- * Increase the size with names of options specific
- * of the current tracer.
- */
- for (i = 0; trace_opts[i].name; i++) {
- len += strlen(trace_opts[i].name);
- len += 3; /* "no" and newline */
- }
-
- /* +1 for \0 */
- buf = kmalloc(len + 1, GFP_KERNEL);
- if (!buf) {
- mutex_unlock(&trace_types_lock);
- return -ENOMEM;
- }
-
for (i = 0; trace_options[i]; i++) {
if (trace_flags & (1 << i))
- r += sprintf(buf + r, "%s\n", trace_options[i]);
+ seq_printf(m, "%s\n", trace_options[i]);
else
- r += sprintf(buf + r, "no%s\n", trace_options[i]);
+ seq_printf(m, "no%s\n", trace_options[i]);
}
for (i = 0; trace_opts[i].name; i++) {
if (tracer_flags & trace_opts[i].bit)
- r += sprintf(buf + r, "%s\n",
- trace_opts[i].name);
+ seq_printf(m, "%s\n", trace_opts[i].name);
else
- r += sprintf(buf + r, "no%s\n",
- trace_opts[i].name);
+ seq_printf(m, "no%s\n", trace_opts[i].name);
}
mutex_unlock(&trace_types_lock);
- WARN_ON(r >= len + 1);
+ return 0;
+}
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+static int __set_tracer_option(struct tracer *trace,
+ struct tracer_flags *tracer_flags,
+ struct tracer_opt *opts, int neg)
+{
+ int ret;
- kfree(buf);
- return r;
+ ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+ if (ret)
+ return ret;
+
+ if (neg)
+ tracer_flags->val &= ~opts->bit;
+ else
+ tracer_flags->val |= opts->bit;
+ return 0;
}
/* Try to assign a tracer specific option */
@@ -2359,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
{
struct tracer_flags *tracer_flags = trace->flags;
struct tracer_opt *opts = NULL;
- int ret = 0, i = 0;
- int len;
+ int i;
for (i = 0; tracer_flags->opts[i].name; i++) {
opts = &tracer_flags->opts[i];
- len = strlen(opts->name);
- if (strncmp(cmp, opts->name, len) == 0) {
- ret = trace->set_flag(tracer_flags->val,
- opts->bit, !neg);
- break;
- }
+ if (strcmp(cmp, opts->name) == 0)
+ return __set_tracer_option(trace, trace->flags,
+ opts, neg);
}
- /* Not found */
- if (!tracer_flags->opts[i].name)
- return -EINVAL;
-
- /* Refused to handle */
- if (ret)
- return ret;
-
- if (neg)
- tracer_flags->val &= ~opts->bit;
- else
- tracer_flags->val |= opts->bit;
- return 0;
+ return -EINVAL;
}
static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2405,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
- char *cmp = buf;
+ char *cmp;
int neg = 0;
int ret;
int i;
@@ -2417,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
return -EFAULT;
buf[cnt] = 0;
+ cmp = strstrip(buf);
- if (strncmp(buf, "no", 2) == 0) {
+ if (strncmp(cmp, "no", 2) == 0) {
neg = 1;
cmp += 2;
}
for (i = 0; trace_options[i]; i++) {
- int len = strlen(trace_options[i]);
-
- if (strncmp(cmp, trace_options[i], len) == 0) {
+ if (strcmp(cmp, trace_options[i]) == 0) {
set_tracer_flags(1 << i, !neg);
break;
}
@@ -2446,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+static int tracing_trace_options_open(struct inode *inode, struct file *file)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+ return single_open(file, tracing_trace_options_show, NULL);
+}
+
static const struct file_operations tracing_iter_fops = {
- .open = tracing_open_generic,
- .read = tracing_trace_options_read,
+ .open = tracing_trace_options_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
.write = tracing_trace_options_write,
};
@@ -2898,6 +2912,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
else
cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+
+ if (iter->trace->pipe_close)
+ iter->trace->pipe_close(iter);
+
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
@@ -3104,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
__free_page(spd->pages[idx]);
}
-static struct pipe_buf_operations tracing_pipe_buf_ops = {
+static const struct pipe_buf_operations tracing_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
@@ -3320,6 +3338,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+static int mark_printk(const char *fmt, ...)
+{
+ int ret;
+ va_list args;
+ va_start(args, fmt);
+ ret = trace_vprintk(0, fmt, args);
+ va_end(args);
+ return ret;
+}
+
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
@@ -3346,28 +3374,25 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
} else
buf[cnt] = '\0';
- cnt = trace_vprintk(0, buf, NULL);
+ cnt = mark_printk("%s", buf);
kfree(buf);
*fpos += cnt;
return cnt;
}
-static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_clock_show(struct seq_file *m, void *v)
{
- char buf[64];
- int bufiter = 0;
int i;
for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
- bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+ seq_printf(m,
"%s%s%s%s", i ? " " : "",
i == trace_clock_id ? "[" : "", trace_clocks[i].name,
i == trace_clock_id ? "]" : "");
- bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+ seq_putc(m, '\n');
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+ return 0;
}
static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3409,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+static int tracing_clock_open(struct inode *inode, struct file *file)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+ return single_open(file, tracing_clock_show, NULL);
+}
+
static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
@@ -3447,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
};
static const struct file_operations trace_clock_fops = {
- .open = tracing_open_generic,
- .read = tracing_clock_read,
+ .open = tracing_clock_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
.write = tracing_clock_write,
};
@@ -3578,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
}
/* Pipe buffer operations for a buffer. */
-static struct pipe_buf_operations buffer_pipe_buf_ops = {
+static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
@@ -3909,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret < 0)
return ret;
- ret = 0;
- switch (val) {
- case 0:
- /* do nothing if already cleared */
- if (!(topt->flags->val & topt->opt->bit))
- break;
-
- mutex_lock(&trace_types_lock);
- if (current_trace->set_flag)
- ret = current_trace->set_flag(topt->flags->val,
- topt->opt->bit, 0);
- mutex_unlock(&trace_types_lock);
- if (ret)
- return ret;
- topt->flags->val &= ~topt->opt->bit;
- break;
- case 1:
- /* do nothing if already set */
- if (topt->flags->val & topt->opt->bit)
- break;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ if (!!(topt->flags->val & topt->opt->bit) != val) {
mutex_lock(&trace_types_lock);
- if (current_trace->set_flag)
- ret = current_trace->set_flag(topt->flags->val,
- topt->opt->bit, 1);
+ ret = __set_tracer_option(current_trace, topt->flags,
+ topt->opt, val);
mutex_unlock(&trace_types_lock);
if (ret)
return ret;
- topt->flags->val |= topt->opt->bit;
- break;
-
- default:
- return -EINVAL;
}
*ppos += cnt;
@@ -4268,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
static void __ftrace_dump(bool disable_tracing)
{
- static raw_spinlock_t ftrace_dump_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t ftrace_dump_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
unsigned int old_userobj;
@@ -4279,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
/* only one dump */
local_irq_save(flags);
- __raw_spin_lock(&ftrace_dump_lock);
+ arch_spin_lock(&ftrace_dump_lock);
if (dump_ran)
goto out;
@@ -4354,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
}
out:
- __raw_spin_unlock(&ftrace_dump_lock);
+ arch_spin_unlock(&ftrace_dump_lock);
local_irq_restore(flags);
}
@@ -4415,7 +4426,7 @@ __init static int tracer_alloc_buffers(void)
/* Allocate the first page for all buffers */
for_each_tracing_cpu(i) {
global_trace.data[i] = &per_cpu(global_trace_cpu, i);
- max_tr.data[i] = &per_cpu(max_data, i);
+ max_tr.data[i] = &per_cpu(max_tr_data, i);
}
trace_init_cmdlines();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1d7f4830a80d..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,6 +272,7 @@ struct tracer_flags {
* @pipe_open: called when the trace_pipe file is opened
* @wait_pipe: override how the user waits for traces on trace_pipe
* @close: called when the trace file is released
+ * @pipe_close: called when the trace_pipe file is released
* @read: override the default read callback on trace_pipe
* @splice_read: override the default splice_read callback on trace_pipe
* @selftest: selftest to run on boot (see trace_selftest.c)
@@ -290,6 +291,7 @@ struct tracer {
void (*pipe_open)(struct trace_iterator *iter);
void (*wait_pipe)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
+ void (*pipe_close)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
@@ -441,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
extern int ring_buffer_expanded;
extern bool tracing_selftest_disabled;
-DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
+DECLARE_PER_CPU(int, ftrace_cpu_disabled);
#ifdef CONFIG_FTRACE_STARTUP_TEST
extern int trace_selftest_startup_function(struct tracer *trace,
@@ -595,18 +597,17 @@ enum trace_iterator_flags {
TRACE_ITER_BIN = 0x40,
TRACE_ITER_BLOCK = 0x80,
TRACE_ITER_STACKTRACE = 0x100,
- TRACE_ITER_SCHED_TREE = 0x200,
- TRACE_ITER_PRINTK = 0x400,
- TRACE_ITER_PREEMPTONLY = 0x800,
- TRACE_ITER_BRANCH = 0x1000,
- TRACE_ITER_ANNOTATE = 0x2000,
- TRACE_ITER_USERSTACKTRACE = 0x4000,
- TRACE_ITER_SYM_USEROBJ = 0x8000,
- TRACE_ITER_PRINTK_MSGONLY = 0x10000,
- TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
- TRACE_ITER_LATENCY_FMT = 0x40000,
- TRACE_ITER_SLEEP_TIME = 0x80000,
- TRACE_ITER_GRAPH_TIME = 0x100000,
+ TRACE_ITER_PRINTK = 0x200,
+ TRACE_ITER_PREEMPTONLY = 0x400,
+ TRACE_ITER_BRANCH = 0x800,
+ TRACE_ITER_ANNOTATE = 0x1000,
+ TRACE_ITER_USERSTACKTRACE = 0x2000,
+ TRACE_ITER_SYM_USEROBJ = 0x4000,
+ TRACE_ITER_PRINTK_MSGONLY = 0x8000,
+ TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
+ TRACE_ITER_LATENCY_FMT = 0x20000,
+ TRACE_ITER_SLEEP_TIME = 0x40000,
+ TRACE_ITER_GRAPH_TIME = 0x80000,
};
/*
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f386ba..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -71,10 +71,10 @@ u64 notrace trace_clock(void)
/* keep prev_time and lock in the same cacheline. */
static struct {
u64 prev_time;
- raw_spinlock_t lock;
+ arch_spinlock_t lock;
} trace_clock_struct ____cacheline_aligned_in_smp =
{
- .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+ .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
};
u64 notrace trace_clock_global(void)
@@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void)
if (unlikely(in_nmi()))
goto out;
- __raw_spin_lock(&trace_clock_struct.lock);
+ arch_spin_lock(&trace_clock_struct.lock);
/*
* TODO: if this happens often then maybe we should reset
@@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void)
trace_clock_struct.prev_time = now;
- __raw_spin_unlock(&trace_clock_struct.lock);
+ arch_spin_unlock(&trace_clock_struct.lock);
out:
raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
char *buf;
int ret = -ENOMEM;
- if (atomic_inc_return(&event->profile_count))
+ if (event->profile_count++ > 0)
return 0;
if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
perf_trace_buf = NULL;
}
fail_buf:
- atomic_dec(&event->profile_count);
+ event->profile_count--;
return ret;
}
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
{
char *buf, *nmi_buf;
- if (!atomic_add_negative(-1, &event->profile_count))
+ if (--event->profile_count > 0)
return;
event->profile_disable(event);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
if (ret) \
return ret;
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
{
int ret;
struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
return ret;
}
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
void trace_destroy_fields(struct ftrace_event_call *call)
{
@@ -105,9 +104,25 @@ void trace_destroy_fields(struct ftrace_event_call *call)
}
}
-static void ftrace_event_enable_disable(struct ftrace_event_call *call,
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+ int id;
+
+ id = register_ftrace_event(call->event);
+ if (!id)
+ return -ENODEV;
+ call->id = id;
+ INIT_LIST_HEAD(&call->fields);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
+static int ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
+ int ret = 0;
+
switch (enable) {
case 0:
if (call->enabled) {
@@ -118,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
break;
case 1:
if (!call->enabled) {
- call->enabled = 1;
tracing_start_cmdline_record();
- call->regfunc(call);
+ ret = call->regfunc(call);
+ if (ret) {
+ tracing_stop_cmdline_record();
+ pr_info("event trace: Could not enable event "
+ "%s\n", call->name);
+ break;
+ }
+ call->enabled = 1;
}
break;
}
+
+ return ret;
}
static void ftrace_clear_events(void)
@@ -402,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
case 0:
case 1:
mutex_lock(&event_mutex);
- ftrace_event_enable_disable(call, val);
+ ret = ftrace_event_enable_disable(call, val);
mutex_unlock(&event_mutex);
break;
@@ -412,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
*ppos += cnt;
- return cnt;
+ return ret ? ret : cnt;
}
static ssize_t
@@ -913,7 +936,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
id);
if (call->define_fields) {
- ret = call->define_fields(call);
+ ret = trace_define_common_fields(call);
+ if (!ret)
+ ret = call->define_fields(call);
if (ret < 0) {
pr_warning("Could not initialize trace point"
" events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
struct struct_name field; \
int ret; \
\
- ret = trace_define_common_fields(event_call); \
- if (ret) \
- return ret; \
- \
tstruct; \
\
return ret; \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..b1342c5d37cf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
#include "trace.h"
#include "trace_output.h"
-struct fgraph_data {
+struct fgraph_cpu_data {
pid_t last_pid;
int depth;
+ int ignore;
+};
+
+struct fgraph_data {
+ struct fgraph_cpu_data *cpu_data;
+
+ /* Place to preserve last processed entry. */
+ struct ftrace_graph_ent_entry ent;
+ struct ftrace_graph_ret_entry ret;
+ int failed;
+ int cpu;
};
#define TRACE_GRAPH_INDENT 2
@@ -176,7 +187,7 @@ static int __trace_graph_entry(struct trace_array *tr,
struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ent_entry *entry;
- if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
return 0;
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -240,7 +251,7 @@ static void __trace_graph_return(struct trace_array *tr,
struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ret_entry *entry;
- if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
if (!data)
return TRACE_TYPE_HANDLED;
- last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+ last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
if (*last_pid == pid)
return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
get_return_for_leaf(struct trace_iterator *iter,
struct ftrace_graph_ent_entry *curr)
{
- struct ring_buffer_iter *ring_iter;
+ struct fgraph_data *data = iter->private;
+ struct ring_buffer_iter *ring_iter = NULL;
struct ring_buffer_event *event;
struct ftrace_graph_ret_entry *next;
- ring_iter = iter->buffer_iter[iter->cpu];
+ /*
+ * If the previous output failed to write to the seq buffer,
+ * then we just reuse the data from before.
+ */
+ if (data && data->failed) {
+ curr = &data->ent;
+ next = &data->ret;
+ } else {
- /* First peek to compare current entry and the next one */
- if (ring_iter)
- event = ring_buffer_iter_peek(ring_iter, NULL);
- else {
- /* We need to consume the current entry to see the next one */
- ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
- event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
- NULL);
- }
+ ring_iter = iter->buffer_iter[iter->cpu];
+
+ /* First peek to compare current entry and the next one */
+ if (ring_iter)
+ event = ring_buffer_iter_peek(ring_iter, NULL);
+ else {
+ /*
+ * We need to consume the current entry to see
+ * the next one.
+ */
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+ event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+ NULL);
+ }
- if (!event)
- return NULL;
+ if (!event)
+ return NULL;
+
+ next = ring_buffer_event_data(event);
- next = ring_buffer_event_data(event);
+ if (data) {
+ /*
+ * Save current and next entries for later reference
+ * if the output fails.
+ */
+ data->ent = *curr;
+ data->ret = *next;
+ }
+ }
if (next->ent.type != TRACE_GRAPH_RET)
return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
/*
* Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
*depth = call->depth;
}
@@ -782,19 +816,34 @@ static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
struct trace_iterator *iter)
{
- int cpu = iter->cpu;
+ struct fgraph_data *data = iter->private;
struct ftrace_graph_ent *call = &field->graph_ent;
struct ftrace_graph_ret_entry *leaf_ret;
+ static enum print_line_t ret;
+ int cpu = iter->cpu;
if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
return TRACE_TYPE_PARTIAL_LINE;
leaf_ret = get_return_for_leaf(iter, field);
if (leaf_ret)
- return print_graph_entry_leaf(iter, field, leaf_ret, s);
+ ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
else
- return print_graph_entry_nested(iter, field, s, cpu);
+ ret = print_graph_entry_nested(iter, field, s, cpu);
+ if (data) {
+ /*
+ * If we failed to write our output, then we need to make
+ * note of it. Because we already consumed our entry.
+ */
+ if (s->full) {
+ data->failed = 1;
+ data->cpu = cpu;
+ } else
+ data->failed = 0;
+ }
+
+ return ret;
}
static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
/*
* Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
int i;
if (data)
- depth = per_cpu_ptr(data, iter->cpu)->depth;
+ depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
if (print_graph_prologue(iter, s, 0, 0))
return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
+ struct ftrace_graph_ent_entry *field;
+ struct fgraph_data *data = iter->private;
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
+ int cpu = iter->cpu;
+ int ret;
+
+ if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+ per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+ return TRACE_TYPE_HANDLED;
+ }
+
+ /*
+ * If the last output failed, there's a possibility we need
+ * to print out the missing entry which would never go out.
+ */
+ if (data && data->failed) {
+ field = &data->ent;
+ iter->cpu = data->cpu;
+ ret = print_graph_entry(field, s, iter);
+ if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+ per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+ ret = TRACE_TYPE_NO_CONSUME;
+ }
+ iter->cpu = cpu;
+ return ret;
+ }
switch (entry->type) {
case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
* sizeof(struct ftrace_graph_ent_entry) is very small,
* it can be safely saved at the stack.
*/
- struct ftrace_graph_ent_entry *field, saved;
+ struct ftrace_graph_ent_entry saved;
trace_assign_type(field, entry);
saved = *field;
return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
static void graph_trace_open(struct trace_iterator *iter)
{
/* pid and depth on the last trace processed */
- struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+ struct fgraph_data *data;
int cpu;
+ iter->private = NULL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- pr_warning("function graph tracer: not enough memory\n");
- else
- for_each_possible_cpu(cpu) {
- pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
- *pid = -1;
- *depth = 0;
- }
+ goto out_err;
+
+ data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+ if (!data->cpu_data)
+ goto out_err_free;
+
+ for_each_possible_cpu(cpu) {
+ pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+ int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+ *pid = -1;
+ *depth = 0;
+ *ignore = 0;
+ }
iter->private = data;
+
+ return;
+
+ out_err_free:
+ kfree(data);
+ out_err:
+ pr_warning("function graph tracer: not enough memory\n");
}
static void graph_trace_close(struct trace_iterator *iter)
{
- free_percpu(iter->private);
+ struct fgraph_data *data = iter->private;
+
+ if (data) {
+ free_percpu(data->cpu_data);
+ kfree(data);
+ }
}
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
.open = graph_trace_open,
+ .pipe_open = graph_trace_open,
.close = graph_trace_close,
+ .pipe_close = graph_trace_close,
.wait_pipe = poll_wait_pipe,
.init = graph_trace_init,
.reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
#define BTS_BUFFER_SIZE (1 << 13)
-static DEFINE_PER_CPU(struct bts_tracer *, tracer);
-static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
+static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
-#define this_tracer per_cpu(tracer, smp_processor_id())
+#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
static int trace_hw_branches_enabled __read_mostly;
static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
static void bts_trace_init_cpu(int cpu)
{
- per_cpu(tracer, cpu) =
- ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
- NULL, (size_t)-1, BTS_KERNEL);
+ per_cpu(hwb_tracer, cpu) =
+ ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
+ BTS_BUFFER_SIZE, NULL, (size_t)-1,
+ BTS_KERNEL);
- if (IS_ERR(per_cpu(tracer, cpu)))
- per_cpu(tracer, cpu) = NULL;
+ if (IS_ERR(per_cpu(hwb_tracer, cpu)))
+ per_cpu(hwb_tracer, cpu) = NULL;
}
static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
for_each_online_cpu(cpu) {
bts_trace_init_cpu(cpu);
- if (likely(per_cpu(tracer, cpu)))
+ if (likely(per_cpu(hwb_tracer, cpu)))
trace_hw_branches_enabled = 1;
}
trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
get_online_cpus();
for_each_online_cpu(cpu) {
- if (likely(per_cpu(tracer, cpu))) {
- ds_release_bts(per_cpu(tracer, cpu));
- per_cpu(tracer, cpu) = NULL;
+ if (likely(per_cpu(hwb_tracer, cpu))) {
+ ds_release_bts(per_cpu(hwb_tracer, cpu));
+ per_cpu(hwb_tracer, cpu) = NULL;
}
}
trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
get_online_cpus();
for_each_online_cpu(cpu)
- if (likely(per_cpu(tracer, cpu)))
- ds_resume_bts(per_cpu(tracer, cpu));
+ if (likely(per_cpu(hwb_tracer, cpu)))
+ ds_resume_bts(per_cpu(hwb_tracer, cpu));
trace_hw_branches_suspended = 0;
put_online_cpus();
}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
get_online_cpus();
for_each_online_cpu(cpu)
- if (likely(per_cpu(tracer, cpu)))
- ds_suspend_bts(per_cpu(tracer, cpu));
+ if (likely(per_cpu(hwb_tracer, cpu)))
+ ds_suspend_bts(per_cpu(hwb_tracer, cpu));
trace_hw_branches_suspended = 1;
put_online_cpus();
}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
bts_trace_init_cpu(cpu);
if (trace_hw_branches_suspended &&
- likely(per_cpu(tracer, cpu)))
- ds_suspend_bts(per_cpu(tracer, cpu));
+ likely(per_cpu(hwb_tracer, cpu)))
+ ds_suspend_bts(per_cpu(hwb_tracer, cpu));
}
break;
case CPU_DOWN_PREPARE:
/* The notification is sent with interrupts enabled. */
- if (likely(per_cpu(tracer, cpu))) {
- ds_release_bts(per_cpu(tracer, cpu));
- per_cpu(tracer, cpu) = NULL;
+ if (likely(per_cpu(hwb_tracer, cpu))) {
+ ds_release_bts(per_cpu(hwb_tracer, cpu));
+ per_cpu(hwb_tracer, cpu) = NULL;
}
}
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
get_online_cpus();
for_each_online_cpu(cpu)
- if (likely(per_cpu(tracer, cpu)))
- ds_suspend_bts(per_cpu(tracer, cpu));
+ if (likely(per_cpu(hwb_tracer, cpu)))
+ ds_suspend_bts(per_cpu(hwb_tracer, cpu));
/*
* We need to collect the trace on the respective cpu since ftrace
* implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
on_each_cpu(trace_bts_cpu, iter->tr, 1);
for_each_online_cpu(cpu)
- if (likely(per_cpu(tracer, cpu)))
- ds_resume_bts(per_cpu(tracer, cpu));
+ if (likely(per_cpu(hwb_tracer, cpu)))
+ ds_resume_bts(per_cpu(hwb_tracer, cpu));
put_online_cpus();
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
goto out_unlock;
trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+ /* Skip 5 functions to get to the irq/preempt enable function */
+ __trace_stack(tr, flags, 5, pc);
if (data->critical_sequence != max_sequence)
goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..7ecab06547a5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
*/
struct trace_probe *tp;
int i, ret = 0;
- int is_return = 0;
+ int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
-
+ /* argc must be >= 1 */
if (argv[0][0] == 'p')
is_return = 0;
else if (argv[0][0] == 'r')
is_return = 1;
+ else if (argv[0][0] == '-')
+ is_delete = 1;
else {
- pr_info("Probe definition must be started with 'p' or 'r'.\n");
+ pr_info("Probe definition must be started with 'p', 'r' or"
+ " '-'.\n");
return -EINVAL;
}
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
}
+ if (!group)
+ group = KPROBE_EVENT_SYSTEM;
+ if (is_delete) {
+ if (!event) {
+ pr_info("Delete command needs an event name.\n");
+ return -EINVAL;
+ }
+ tp = find_probe_event(event, group);
+ if (!tp) {
+ pr_info("Event %s/%s doesn't exist.\n", group, event);
+ return -ENOENT;
+ }
+ /* delete an event */
+ unregister_trace_probe(tp);
+ free_trace_probe(tp);
+ return 0;
+ }
+
+ if (argc < 2) {
+ pr_info("Probe point is not specified.\n");
+ return -EINVAL;
+ }
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2;
/* setup a probe */
- if (!group)
- group = KPROBE_EVENT_SYSTEM;
if (!event) {
/* Make a new event name */
if (symbol)
@@ -1113,10 +1132,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
struct kprobe_trace_entry field;
struct trace_probe *tp = (struct trace_probe *)event_call->data;
- ret = trace_define_common_fields(event_call);
- if (!ret)
- return ret;
-
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
/* Set argument names as fields */
@@ -1131,10 +1146,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
struct kretprobe_trace_entry field;
struct trace_probe *tp = (struct trace_probe *)event_call->data;
- ret = trace_define_common_fields(event_call);
- if (!ret)
- return ret;
-
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
@@ -1434,7 +1445,6 @@ static int register_probe_event(struct trace_probe *tp)
call->unregfunc = probe_event_disable;
#ifdef CONFIG_EVENT_PROFILE
- atomic_set(&call->profile_count, -1);
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..faf37fa4408c 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
}
#endif /* CONFIG_PROFILE_KSYM_TRACER */
-void ksym_hbp_handler(struct perf_event *hbp, void *data)
+void ksym_hbp_handler(struct perf_event *hbp, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
struct ring_buffer_event *event;
struct ksym_trace_entry *entry;
- struct pt_regs *regs = data;
struct ring_buffer *buffer;
int pc;
@@ -235,7 +236,8 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
- ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
+ ret = trace_seq_printf(s, "%pS:",
+ (void *)(unsigned long)entry->attr.bp_addr);
if (entry->attr.bp_type == HW_BREAKPOINT_R)
ret = trace_seq_puts(s, "r--\n");
else if (entry->attr.bp_type == HW_BREAKPOINT_W)
@@ -277,21 +279,20 @@ static ssize_t ksym_trace_filter_write(struct file *file,
{
struct trace_ksym *entry;
struct hlist_node *node;
- char *input_string, *ksymname = NULL;
+ char *buf, *input_string, *ksymname = NULL;
unsigned long ksym_addr = 0;
int ret, op, changed = 0;
- input_string = kzalloc(count + 1, GFP_KERNEL);
- if (!input_string)
+ buf = kzalloc(count + 1, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- if (copy_from_user(input_string, buffer, count)) {
- kfree(input_string);
- return -EFAULT;
- }
- input_string[count] = '\0';
+ ret = -EFAULT;
+ if (copy_from_user(buf, buffer, count))
+ goto out;
- strstrip(input_string);
+ buf[count] = '\0';
+ input_string = strstrip(buf);
/*
* Clear all breakpoints if:
@@ -299,18 +300,16 @@ static ssize_t ksym_trace_filter_write(struct file *file,
* 2: echo 0 > ksym_trace_filter
* 3: echo "*:---" > ksym_trace_filter
*/
- if (!input_string[0] || !strcmp(input_string, "0") ||
- !strcmp(input_string, "*:---")) {
+ if (!buf[0] || !strcmp(buf, "0") ||
+ !strcmp(buf, "*:---")) {
__ksym_trace_reset();
- kfree(input_string);
- return count;
+ ret = 0;
+ goto out;
}
ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
- if (ret < 0) {
- kfree(input_string);
- return ret;
- }
+ if (ret < 0)
+ goto out;
mutex_lock(&ksym_tracer_mutex);
@@ -321,7 +320,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
if (entry->attr.bp_type != op)
changed = 1;
else
- goto out;
+ goto out_unlock;
break;
}
}
@@ -336,28 +335,24 @@ static ssize_t ksym_trace_filter_write(struct file *file,
if (IS_ERR(entry->ksym_hbp))
ret = PTR_ERR(entry->ksym_hbp);
else
- goto out;
+ goto out_unlock;
}
/* Error or "symbol:---" case: drop it */
ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu();
kfree(entry);
- goto out;
+ goto out_unlock;
} else {
/* Check for malformed request: (4) */
- if (op == 0)
- goto out;
- ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (op)
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
}
-out:
+out_unlock:
mutex_unlock(&ksym_tracer_mutex);
-
- kfree(input_string);
-
- if (!ret)
- ret = count;
- return ret;
+out:
+ kfree(buf);
+ return !ret ? count : ret;
}
static const struct file_operations ksym_tracing_fops = {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
-void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+ int ret;
+
+ ret = seq_write(m, s->buffer, len);
- seq_write(m, s->buffer, len);
+ /*
+ * Only reset this buffer if we successfully wrote to the
+ * seq_file buffer.
+ */
+ if (!ret)
+ trace_seq_init(s);
- trace_seq_init(s);
+ return ret;
}
enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
va_list ap;
int ret;
- if (!len)
+ if (s->full || !len)
return 0;
va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
va_end(ap);
/* If we can't write it all, don't bother writing anything */
- if (ret >= len)
+ if (ret >= len) {
+ s->full = 1;
return 0;
+ }
s->len += ret;
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
int len = (PAGE_SIZE - 1) - s->len;
int ret;
- if (!len)
+ if (s->full || !len)
return 0;
ret = vsnprintf(s->buffer + s->len, len, fmt, args);
/* If we can't write it all, don't bother writing anything */
- if (ret >= len)
+ if (ret >= len) {
+ s->full = 1;
return 0;
+ }
s->len += ret;
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
int len = (PAGE_SIZE - 1) - s->len;
int ret;
- if (!len)
+ if (s->full || !len)
return 0;
ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
/* If we can't write it all, don't bother writing anything */
- if (ret >= len)
+ if (ret >= len) {
+ s->full = 1;
return 0;
+ }
s->len += ret;
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
{
int len = strlen(str);
- if (len > ((PAGE_SIZE - 1) - s->len))
+ if (s->full)
+ return 0;
+
+ if (len > ((PAGE_SIZE - 1) - s->len)) {
+ s->full = 1;
return 0;
+ }
memcpy(s->buffer + s->len, str, len);
s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
int trace_seq_putc(struct trace_seq *s, unsigned char c)
{
- if (s->len >= (PAGE_SIZE - 1))
+ if (s->full)
return 0;
+ if (s->len >= (PAGE_SIZE - 1)) {
+ s->full = 1;
+ return 0;
+ }
+
s->buffer[s->len++] = c;
return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
{
- if (len > ((PAGE_SIZE - 1) - s->len))
+ if (s->full)
return 0;
+ if (len > ((PAGE_SIZE - 1) - s->len)) {
+ s->full = 1;
+ return 0;
+ }
+
memcpy(s->buffer + s->len, mem, len);
s->len += len;
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
const unsigned char *data = mem;
int i, j;
+ if (s->full)
+ return 0;
+
#ifdef __BIG_ENDIAN
for (i = 0, j = 0; i < len; i++) {
#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
{
void *ret;
- if (len > ((PAGE_SIZE - 1) - s->len))
+ if (s->full)
+ return 0;
+
+ if (len > ((PAGE_SIZE - 1) - s->len)) {
+ s->full = 1;
return NULL;
+ }
ret = s->buffer + s->len;
s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
{
unsigned char *p;
- if (s->len >= (PAGE_SIZE - 1))
+ if (s->full)
+ return 0;
+
+ if (s->len >= (PAGE_SIZE - 1)) {
+ s->full = 1;
return 0;
+ }
+
p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
if (!IS_ERR(p)) {
p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
return 1;
}
+ s->full = 1;
return 0;
}
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
unsigned long vmstart = 0;
int ret = 1;
+ if (s->full)
+ return 0;
+
if (mm) {
const struct vm_area_struct *vma;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
static unsigned wakeup_prio = -1;
static int wakeup_rt;
-static raw_spinlock_t wakeup_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t wakeup_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static void __wakeup_reset(struct trace_array *tr);
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
goto out;
local_irq_save(flags);
- __raw_spin_lock(&wakeup_lock);
+ arch_spin_lock(&wakeup_lock);
/* We could race with grabbing wakeup_lock */
if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
out_unlock:
__wakeup_reset(wakeup_trace);
- __raw_spin_unlock(&wakeup_lock);
+ arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
tracing_reset_online_cpus(tr);
local_irq_save(flags);
- __raw_spin_lock(&wakeup_lock);
+ arch_spin_lock(&wakeup_lock);
__wakeup_reset(tr);
- __raw_spin_unlock(&wakeup_lock);
+ arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
}
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
goto out;
/* interrupts should be off from try_to_wake_up */
- __raw_spin_lock(&wakeup_lock);
+ arch_spin_lock(&wakeup_lock);
/* check for races. */
if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
- __raw_spin_unlock(&wakeup_lock);
+ arch_spin_unlock(&wakeup_lock);
out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index dc98309e839a..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
/* Don't allow flipping of max traces now */
local_irq_save(flags);
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
cnt = ring_buffer_entries(tr->buffer);
@@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
break;
}
tracing_on();
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
local_irq_restore(flags);
if (count)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
};
static unsigned long max_stack_size;
-static raw_spinlock_t max_stack_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t max_stack_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static int stack_trace_disabled __read_mostly;
static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
return;
local_irq_save(flags);
- __raw_spin_lock(&max_stack_lock);
+ arch_spin_lock(&max_stack_lock);
/* a race could have already updated it */
if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
}
out:
- __raw_spin_unlock(&max_stack_lock);
+ arch_spin_unlock(&max_stack_lock);
local_irq_restore(flags);
}
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
return ret;
local_irq_save(flags);
- __raw_spin_lock(&max_stack_lock);
+ arch_spin_lock(&max_stack_lock);
*ptr = val;
- __raw_spin_unlock(&max_stack_lock);
+ arch_spin_unlock(&max_stack_lock);
local_irq_restore(flags);
return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
local_irq_disable();
- __raw_spin_lock(&max_stack_lock);
+ arch_spin_lock(&max_stack_lock);
if (*pos == 0)
return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void t_stop(struct seq_file *m, void *p)
{
- __raw_spin_unlock(&max_stack_lock);
+ arch_spin_unlock(&max_stack_lock);
local_irq_enable();
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
int i;
int offset = offsetof(typeof(trace), args);
- ret = trace_define_common_fields(call);
- if (ret)
- return ret;
-
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
struct syscall_trace_exit trace;
int ret;
- ret = trace_define_common_fields(call);
- if (ret)
- return ret;
-
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
@@ -333,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter);
- if (ret) {
- pr_info("event trace: Could not activate"
- "syscall entry trace point");
- } else {
+ if (!ret) {
set_bit(num, enabled_enter_syscalls);
sys_refcount_enter++;
}
@@ -370,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit);
- if (ret) {
- pr_info("event trace: Could not activate"
- "syscall exit trace point");
- } else {
+ if (!ret) {
set_bit(num, enabled_exit_syscalls);
sys_refcount_exit++;
}
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
new file mode 100644
index 000000000000..eb27fd3430a2
--- /dev/null
+++ b/kernel/user-return-notifier.c
@@ -0,0 +1,44 @@
+
+#include <linux/user-return-notifier.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
+
+/*
+ * Request a notification when the current cpu returns to userspace. Must be
+ * called in atomic context. The notifier will also be called in atomic
+ * context.
+ */
+void user_return_notifier_register(struct user_return_notifier *urn)
+{
+ set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+ hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list));
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_register);
+
+/*
+ * Removes a registered user return notifier. Must be called from atomic
+ * context, and from the same cpu registration occured in.
+ */
+void user_return_notifier_unregister(struct user_return_notifier *urn)
+{
+ hlist_del(&urn->link);
+ if (hlist_empty(&__get_cpu_var(return_notifier_list)))
+ clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
+
+/* Calls registered user return notifiers */
+void fire_user_return_notifiers(void)
+{
+ struct user_return_notifier *urn;
+ struct hlist_node *tmp1, *tmp2;
+ struct hlist_head *head;
+
+ head = &get_cpu_var(return_notifier_list);
+ hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
+ urn->on_user_return(urn);
+ put_cpu_var(return_notifier_list);
+}
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 69eae358a726..a2cd77e70d4d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -57,78 +57,47 @@ static int proc_do_uts_string(ctl_table *table, int write,
#define proc_do_uts_string NULL
#endif
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-/* The generic string strategy routine: */
-static int sysctl_uts_string(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- struct ctl_table uts_table;
- int r, write;
- write = newval && newlen;
- memcpy(&uts_table, table, sizeof(uts_table));
- uts_table.data = get_uts(table, write);
- r = sysctl_string(&uts_table, oldval, oldlenp, newval, newlen);
- put_uts(table, write, uts_table.data);
- return r;
-}
-#else
-#define sysctl_uts_string NULL
-#endif
-
static struct ctl_table uts_kern_table[] = {
{
- .ctl_name = KERN_OSTYPE,
.procname = "ostype",
.data = init_uts_ns.name.sysname,
.maxlen = sizeof(init_uts_ns.name.sysname),
.mode = 0444,
.proc_handler = proc_do_uts_string,
- .strategy = sysctl_uts_string,
},
{
- .ctl_name = KERN_OSRELEASE,
.procname = "osrelease",
.data = init_uts_ns.name.release,
.maxlen = sizeof(init_uts_ns.name.release),
.mode = 0444,
.proc_handler = proc_do_uts_string,
- .strategy = sysctl_uts_string,
},
{
- .ctl_name = KERN_VERSION,
.procname = "version",
.data = init_uts_ns.name.version,
.maxlen = sizeof(init_uts_ns.name.version),
.mode = 0444,
.proc_handler = proc_do_uts_string,
- .strategy = sysctl_uts_string,
},
{
- .ctl_name = KERN_NODENAME,
.procname = "hostname",
.data = init_uts_ns.name.nodename,
.maxlen = sizeof(init_uts_ns.name.nodename),
.mode = 0644,
.proc_handler = proc_do_uts_string,
- .strategy = sysctl_uts_string,
},
{
- .ctl_name = KERN_DOMAINNAME,
.procname = "domainname",
.data = init_uts_ns.name.domainname,
.maxlen = sizeof(init_uts_ns.name.domainname),
.mode = 0644,
.proc_handler = proc_do_uts_string,
- .strategy = sysctl_uts_string,
},
{}
};
static struct ctl_table uts_root_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = uts_kern_table,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 67e526b6ae81..dee48658805c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,116 @@ struct workqueue_struct {
#endif
};
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+
+static struct debug_obj_descr work_debug_descr;
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int work_fixup_init(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ cancel_work_sync(work);
+ debug_object_init(work, &work_debug_descr);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int work_fixup_activate(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+
+ case ODEBUG_STATE_NOTAVAILABLE:
+ /*
+ * This is not really a fixup. The work struct was
+ * statically initialized. We just make sure that it
+ * is tracked in the object tracker.
+ */
+ if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
+ debug_object_init(work, &work_debug_descr);
+ debug_object_activate(work, &work_debug_descr);
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return 0;
+
+ case ODEBUG_STATE_ACTIVE:
+ WARN_ON(1);
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int work_fixup_free(void *addr, enum debug_obj_state state)
+{
+ struct work_struct *work = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ cancel_work_sync(work);
+ debug_object_free(work, &work_debug_descr);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static struct debug_obj_descr work_debug_descr = {
+ .name = "work_struct",
+ .fixup_init = work_fixup_init,
+ .fixup_activate = work_fixup_activate,
+ .fixup_free = work_fixup_free,
+};
+
+static inline void debug_work_activate(struct work_struct *work)
+{
+ debug_object_activate(work, &work_debug_descr);
+}
+
+static inline void debug_work_deactivate(struct work_struct *work)
+{
+ debug_object_deactivate(work, &work_debug_descr);
+}
+
+void __init_work(struct work_struct *work, int onstack)
+{
+ if (onstack)
+ debug_object_init_on_stack(work, &work_debug_descr);
+ else
+ debug_object_init(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(__init_work);
+
+void destroy_work_on_stack(struct work_struct *work)
+{
+ debug_object_free(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+
+#else
+static inline void debug_work_activate(struct work_struct *work) { }
+static inline void debug_work_deactivate(struct work_struct *work) { }
+#endif
+
/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
@@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
{
unsigned long flags;
+ debug_work_activate(work);
spin_lock_irqsave(&cwq->lock, flags);
insert_work(cwq, work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock, flags);
@@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
struct lockdep_map lockdep_map = work->lockdep_map;
#endif
trace_workqueue_execution(cwq->thread, work);
+ debug_work_deactivate(work);
cwq->current_work = work;
list_del_init(cwq->worklist.next);
spin_unlock_irq(&cwq->lock);
@@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work)
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
struct wq_barrier *barr, struct list_head *head)
{
- INIT_WORK(&barr->work, wq_barrier_func);
+ /*
+ * debugobject calls are safe here even with cwq->lock locked
+ * as we know for sure that this will not trigger any of the
+ * checks and call back into the fixup functions where we
+ * might deadlock.
+ */
+ INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
init_completion(&barr->done);
+ debug_work_activate(&barr->work);
insert_work(cwq, &barr->work, head);
}
@@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
}
spin_unlock_irq(&cwq->lock);
- if (active)
+ if (active) {
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ }
return active;
}
@@ -451,6 +572,7 @@ out:
return 0;
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
return 1;
}
EXPORT_SYMBOL_GPL(flush_work);
@@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work)
*/
smp_rmb();
if (cwq == get_wq_data(work)) {
+ debug_work_deactivate(work);
list_del_init(&work->entry);
ret = 1;
}
@@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
}
spin_unlock_irq(&cwq->lock);
- if (unlikely(running))
+ if (unlikely(running)) {
wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ }
}
static void wait_on_work(struct work_struct *work)