diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.locks | 4 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/compat.c | 68 | ||||
-rw-r--r-- | kernel/cpuset.c | 21 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 38 | ||||
-rw-r--r-- | kernel/futex_compat.c | 38 | ||||
-rw-r--r-- | kernel/irq/Kconfig | 15 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 8 | ||||
-rw-r--r-- | kernel/sched/core.c | 62 | ||||
-rw-r--r-- | kernel/sched/fair.c | 16 | ||||
-rw-r--r-- | kernel/sched/rt.c | 2 | ||||
-rw-r--r-- | kernel/spinlock.c | 2 | ||||
-rw-r--r-- | kernel/time.c | 6 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 8 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/ntp.c | 134 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 51 |
19 files changed, 248 insertions, 232 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 5068e2a4e75f..2251882daf53 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ ARCH_INLINE_SPIN_LOCK_IRQSAVE -config INLINE_SPIN_UNLOCK - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) +config UNINLINE_SPIN_UNLOCK + bool config INLINE_SPIN_UNLOCK_BH def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 24e7cb0ba26a..3f9c97419f02 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" select PREEMPT_COUNT + select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f4ea4b6f3cf1..ed64ccac67c9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1883,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, */ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { - int retval; + int retval = 0; struct cgroup_subsys *ss, *failed_ss = NULL; struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; diff --git a/kernel/compat.c b/kernel/compat.c index f346cedfe24d..74ff8498809a 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -31,11 +31,10 @@ #include <asm/uaccess.h> /* - * Note that the native side is already converted to a timespec, because - * that's what we want anyway. + * Get/set struct timeval with struct timespec on the native side */ -static int compat_get_timeval(struct timespec *o, - struct compat_timeval __user *i) +static int compat_get_timeval_convert(struct timespec *o, + struct compat_timeval __user *i) { long usec; @@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o, return 0; } -static int compat_put_timeval(struct compat_timeval __user *o, - struct timeval *i) +static int compat_put_timeval_convert(struct compat_timeval __user *o, + struct timeval *i) { return (put_user(i->tv_sec, &o->tv_sec) || put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; @@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, if (tv) { struct timeval ktv; do_gettimeofday(&ktv); - if (compat_put_timeval(tv, &ktv)) + if (compat_put_timeval_convert(tv, &ktv)) return -EFAULT; } if (tz) { @@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone ktz; if (tv) { - if (compat_get_timeval(&kts, tv)) + if (compat_get_timeval_convert(&kts, tv)) return -EFAULT; } if (tz) { @@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } +int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv) +{ + return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) || + __get_user(tv->tv_sec, &ctv->tv_sec) || + __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(get_compat_timeval); + +int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv) +{ + return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) || + __put_user(tv->tv_sec, &ctv->tv_sec) || + __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(put_compat_timeval); + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || __get_user(ts->tv_sec, &cts->tv_sec) || __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; } +EXPORT_SYMBOL_GPL(get_compat_timespec); int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) { @@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user } EXPORT_SYMBOL_GPL(put_compat_timespec); +int compat_get_timeval(struct timeval *tv, const void __user *utv) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0; + else + return get_compat_timeval(tv, utv); +} +EXPORT_SYMBOL_GPL(compat_get_timeval); + +int compat_put_timeval(const struct timeval *tv, void __user *utv) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0; + else + return put_compat_timeval(tv, utv); +} +EXPORT_SYMBOL_GPL(compat_put_timeval); + +int compat_get_timespec(struct timespec *ts, const void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0; + else + return get_compat_timespec(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_get_timespec); + +int compat_put_timespec(const struct timespec *ts, void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0; + else + return put_compat_timespec(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_put_timespec); + static long compat_nanosleep_restart(struct restart_block *restart) { struct compat_timespec __user *rmtp; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1010cc61931f..b96ad75b7e64 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) mutex_unlock(&callback_mutex); } -int cpuset_cpus_allowed_fallback(struct task_struct *tsk) +void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { const struct cpuset *cs; - int cpu; rcu_read_lock(); cs = task_cs(tsk); @@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary * set any mask even if it is not right from task_cs() pov, * the pending set_cpus_allowed_ptr() will fix things. + * + * select_fallback_rq() will fix things ups and set cpu_possible_mask + * if required. */ - - cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); - if (cpu >= nr_cpu_ids) { - /* - * Either tsk->cpus_allowed is wrong (see above) or it - * is actually empty. The latter case is only possible - * if we are racing with remove_tasks_in_empty_cpuset(). - * Like above we can temporary set any mask and rely on - * set_cpus_allowed_ptr() as synchronization point. - */ - do_set_cpus_allowed(tsk, cpu_possible_mask); - cpu = cpumask_any(cpu_active_mask); - } - - return cpu; } void cpuset_init_current_mems_allowed(void) diff --git a/kernel/exit.c b/kernel/exit.c index 3db1909faed9..d8bd3b425fa7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -474,7 +474,7 @@ static void close_files(struct files_struct * files) i = j * __NFDBITS; if (i >= fdt->max_fds) break; - set = fdt->open_fds->fds_bits[j++]; + set = fdt->open_fds[j++]; while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); diff --git a/kernel/futex.c b/kernel/futex.c index 72efa1e4359a..e2b0fb9a0b3b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -59,6 +59,7 @@ #include <linux/magic.h> #include <linux/pid.h> #include <linux/nsproxy.h> +#include <linux/ptrace.h> #include <asm/futex.h> @@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, { struct robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 5f9e689dc8f0..83e368b005fc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -10,6 +10,7 @@ #include <linux/compat.h> #include <linux/nsproxy.h> #include <linux/futex.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> @@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->compat_robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->compat_robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5a38bf4de641..cf1a4a68ce44 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -13,7 +13,7 @@ config GENERIC_HARDIRQS # Options selectable by the architecture code # Make sparse irq Kconfig switch below available -config HAVE_SPARSE_IRQ +config MAY_HAVE_SPARSE_IRQ bool # Enable the generic irq autoprobe mechanism @@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP config IRQ_DOMAIN bool +config IRQ_DOMAIN_DEBUG + bool "Expose hardware/virtual IRQ mapping via debugfs" + depends on IRQ_DOMAIN && DEBUG_FS + help + This option will show the mapping relationship between hardware irq + numbers and Linux irq numbers. The mapping is exposed via debugfs + in the file "virq_mapping". + + If you don't know what this means you don't need it. + # Support forced irq threading config IRQ_FORCED_THREADING bool config SPARSE_IRQ - bool "Support sparse irq numbering" - depends on HAVE_SPARSE_IRQ + bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ ---help--- Sparse irq numbering is useful for distro kernels that want diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index af48e59bc2ff..3601f3fbf67c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -632,7 +632,7 @@ unsigned int irq_linear_revmap(struct irq_domain *domain, return revmap[hwirq]; } -#ifdef CONFIG_VIRQ_DEBUG +#ifdef CONFIG_IRQ_DOMAIN_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; @@ -668,7 +668,7 @@ static int virq_debug_show(struct seq_file *m, void *private) data = irq_desc_get_chip_data(desc); seq_printf(m, "0x%16p ", data); - if (desc->irq_data.domain->of_node) + if (desc->irq_data.domain && desc->irq_data.domain->of_node) p = desc->irq_data.domain->of_node->full_name; else p = none; @@ -695,14 +695,14 @@ static const struct file_operations virq_debug_fops = { static int __init irq_debugfs_init(void) { - if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, + if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL, NULL, &virq_debug_fops) == NULL) return -ENOMEM; return 0; } __initcall(irq_debugfs_init); -#endif /* CONFIG_VIRQ_DEBUG */ +#endif /* CONFIG_IRQ_DOMAIN_DEBUG */ int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 157fb9b2b186..e3ed0ecee7c7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1265,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process); */ static int select_fallback_rq(int cpu, struct task_struct *p) { - int dest_cpu; const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); + enum { cpuset, possible, fail } state = cpuset; + int dest_cpu; /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) + for_each_cpu_mask(dest_cpu, *nodemask) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) return dest_cpu; + } - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); - if (dest_cpu < nr_cpu_ids) - return dest_cpu; + for (;;) { + /* Any allowed, online CPU? */ + for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; + goto out; + } - /* No more Mr. Nice Guy. */ - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); + switch (state) { + case cpuset: + /* No more Mr. Nice Guy. */ + cpuset_cpus_allowed_fallback(p); + state = possible; + break; + + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; + break; + + case fail: + BUG(); + break; + } + } + +out: + if (state != cpuset) { + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk_sched("process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } } return dest_cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 94340c7544a9..0d97ebdc58f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #endif /* CONFIG_FAIR_GROUP_SCHED */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec); +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); /************************************************************** * Scheduling class tree data structure manipulation methods: @@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, resched_task(rq_of(cfs_rq)->curr); } -static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) { if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) return; @@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq) } #else /* CONFIG_CFS_BANDWIDTH */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) {} +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b60dad720173..44af55e6d5d0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) next_idx: if (idx >= MAX_RT_PRIO) continue; - if (next && next->prio < idx) + if (next && next->prio <= idx) continue; list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p; diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 84c7d96918bf..5cdd8065a3ce 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) EXPORT_SYMBOL(_raw_spin_lock_bh); #endif -#ifndef CONFIG_INLINE_SPIN_UNLOCK +#ifdef CONFIG_UNINLINE_SPIN_UNLOCK void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) { __raw_spin_unlock(lock); diff --git a/kernel/time.c b/kernel/time.c index 73e416db0a1e..ba744cf80696 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) return error; if (tz) { - /* SMP safe, global irq locking makes it work. */ sys_tz = *tz; update_vsyscall_tz(); if (firsttime) { @@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) } } if (tv) - { - /* SMP safe, again the code in arch/foo/time.c should - * globally block out interrupts when it runs. - */ return do_settimeofday(tv); - } return 0; } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 8a46f5d64504..8a538c55fc7b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev, return 0; } +static inline void alarmtimer_rtc_timer_init(void) +{ + rtc_timer_init(&rtctimer, NULL, NULL); +} + static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, }; @@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void) #define rtcdev (NULL) static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } +static inline void alarmtimer_rtc_timer_init(void) { } #endif /** @@ -783,6 +789,8 @@ static int __init alarmtimer_init(void) .nsleep = alarm_timer_nsleep, }; + alarmtimer_rtc_timer_init(); + posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a45ca167ab24..c9583382141a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) { u64 ret; /* - * We won't try to correct for more then 11% adjustments (110,000 ppm), + * We won't try to correct for more than 11% adjustments (110,000 ppm), */ ret = (u64)cs->mult * 11; do_div(ret,100); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 6e039b144daf..f03fd83b170b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -34,8 +34,6 @@ unsigned long tick_nsec; static u64 tick_length; static u64 tick_length_base; -static struct hrtimer leap_timer; - #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) @@ -381,70 +379,63 @@ u64 ntp_tick_length(void) /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * Also handles leap second processing, and returns leap offset */ -static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) +int second_overflow(unsigned long secs) { - enum hrtimer_restart res = HRTIMER_NORESTART; - unsigned long flags; + s64 delta; int leap = 0; + unsigned long flags; spin_lock_irqsave(&ntp_lock, flags); + + /* + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. + */ switch (time_state) { case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; break; case TIME_INS: - leap = -1; - time_state = TIME_OOP; - printk(KERN_NOTICE - "Clock: inserting leap second 23:59:60 UTC\n"); - hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); - res = HRTIMER_RESTART; + if (secs % 86400 == 0) { + leap = -1; + time_state = TIME_OOP; + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); + } break; case TIME_DEL: - leap = 1; - time_tai--; - time_state = TIME_WAIT; - printk(KERN_NOTICE - "Clock: deleting leap second 23:59:59 UTC\n"); + if ((secs + 1) % 86400 == 0) { + leap = 1; + time_tai--; + time_state = TIME_WAIT; + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); + } break; case TIME_OOP: time_tai++; time_state = TIME_WAIT; - /* fall through */ + break; + case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; break; } - spin_unlock_irqrestore(&ntp_lock, flags); - /* - * We have to call this outside of the ntp_lock to keep - * the proper locking hierarchy - */ - if (leap) - timekeeping_leap_insert(leap); - - return res; -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - */ -void second_overflow(void) -{ - s64 delta; - unsigned long flags; - - spin_lock_irqsave(&ntp_lock, flags); /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; @@ -481,15 +472,17 @@ void second_overflow(void) tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; + + + out: spin_unlock_irqrestore(&ntp_lock, flags); + + return leap; } #ifdef CONFIG_GENERIC_CMOS_UPDATE -/* Disable the cmos update - used by virtualization and embedded */ -int no_sync_cmos_clock __read_mostly; - static void sync_cmos_clock(struct work_struct *work); static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); @@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work) static void notify_cmos_timer(void) { - if (!no_sync_cmos_clock) - schedule_delayed_work(&sync_cmos_work, 0); + schedule_delayed_work(&sync_cmos_work, 0); } #else static inline void notify_cmos_timer(void) { } #endif -/* - * Start the leap seconds timer: - */ -static inline void ntp_start_leap_timer(struct timespec *ts) -{ - long now = ts->tv_sec; - - if (time_status & STA_INS) { - time_state = TIME_INS; - now += 86400 - now % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - - return; - } - - if (time_status & STA_DEL) { - time_state = TIME_DEL; - now += 86400 - (now + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } -} /* * Propagate a new txc->status value into the NTP state: @@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; - switch (time_state) { - case TIME_OK: - ntp_start_leap_timer(ts); - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - ntp_start_leap_timer(ts); - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } } /* * Called with the xtime lock held, so we can access and modify @@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc) (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) return -EINVAL; - - if (txc->modes & ADJ_STATUS && time_state != TIME_OK) - hrtimer_cancel(&leap_timer); } if (txc->modes & ADJ_SETOFFSET) { @@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); - hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - leap_timer.function = ntp_leap_second; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 15be32e19c6e..d66b21308f7c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp) } -void timekeeping_leap_insert(int leapsecond) -{ - unsigned long flags; - - write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeper.xtime.tv_sec += leapsecond; - timekeeper.wall_to_monotonic.tv_sec -= leapsecond; - timekeeping_update(false); - write_sequnlock_irqrestore(&timekeeper.lock, flags); - -} - /** * timekeeping_forward_now - update clock to the current time * @@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset); static int change_clocksource(void *data) { struct clocksource *new, *old; + unsigned long flags; new = (struct clocksource *) data; + write_seqlock_irqsave(&timekeeper.lock, flags); + timekeeping_forward_now(); if (!new->enable || new->enable(new) == 0) { old = timekeeper.clock; @@ -458,6 +449,10 @@ static int change_clocksource(void *data) if (old->disable) old->disable(old); } + timekeeping_update(true); + + write_sequnlock_irqrestore(&timekeeper.lock, flags); + return 0; } @@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset) int adj; /* - * The point of this is to check if the error is greater then half + * The point of this is to check if the error is greater than half * an interval. * * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. @@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset) * Note we subtract one in the shift, so that error is really error*2. * This "saves" dividing(shifting) interval twice, but keeps the * (error > interval) comparison as still measuring if error is - * larger then half an interval. + * larger than half an interval. * * Note: It does not "save" on aggravation when reading the code. */ @@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset) if (error > interval) { /* * We now divide error by 4(via shift), which checks if - * the error is greater then twice the interval. + * the error is greater than twice the interval. * If it is greater, we need a bigadjust, if its smaller, * we can adjust by 1. */ @@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset) } else /* No adjustment needed */ return; - WARN_ONCE(timekeeper.clock->maxadj && - (timekeeper.mult + adj > timekeeper.clock->mult + - timekeeper.clock->maxadj), - "Adjusting %s more then 11%% (%ld vs %ld)\n", + if (unlikely(timekeeper.clock->maxadj && + (timekeeper.mult + adj > + timekeeper.clock->mult + timekeeper.clock->maxadj))) { + printk_once(KERN_WARNING + "Adjusting %s more than 11%% (%ld vs %ld)\n", timekeeper.clock->name, (long)timekeeper.mult + adj, (long)timekeeper.clock->mult + timekeeper.clock->maxadj); + } /* * So the following can be confusing. * @@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; - /* If the offset is smaller then a shifted interval, do nothing */ + /* If the offset is smaller than a shifted interval, do nothing */ if (offset < timekeeper.cycle_interval<<shift) return offset; @@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; while (timekeeper.xtime_nsec >= nsecps) { + int leap; timekeeper.xtime_nsec -= nsecps; timekeeper.xtime.tv_sec++; - second_overflow(); + leap = second_overflow(timekeeper.xtime.tv_sec); + timekeeper.xtime.tv_sec += leap; } /* Accumulate raw time */ @@ -1018,13 +1017,13 @@ static void update_wall_time(void) * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, * we calculate the largest doubling multiple of cycle_intervals - * that is smaller then the offset. We then accumulate that + * that is smaller than the offset. We then accumulate that * chunk in one go, and then try to consume the next smaller * doubled multiple. */ shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); shift = max(0, shift); - /* Bound shift to one less then what overflows tick_length */ + /* Bound shift to one less than what overflows tick_length */ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { @@ -1072,12 +1071,14 @@ static void update_wall_time(void) /* * Finally, make sure that after the rounding - * xtime.tv_nsec isn't larger then NSEC_PER_SEC + * xtime.tv_nsec isn't larger than NSEC_PER_SEC */ if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { + int leap; timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; timekeeper.xtime.tv_sec++; - second_overflow(); + leap = second_overflow(timekeeper.xtime.tv_sec); + timekeeper.xtime.tv_sec += leap; } timekeeping_update(false); |