summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-11-13 15:36:33 +0000
committerDavid Howells <dhowells@redhat.com>2017-11-13 15:36:33 +0000
commit81445e63e67a1e98b1c2575fa2b406d4289d2754 (patch)
tree2d121d1873b3e8ec7b71086c4db2f5cb207a46ec /kernel
parentede372dcae15d3ba1d3d52ac45be7a27ce31be28 (diff)
parentb24591e2fcf852ad7ad2ccf745c8220bf378d312 (diff)
downloadlinux-81445e63e67a1e98b1c2575fa2b406d4289d2754.tar.bz2
Merge remote-tracking branch 'tip/timers/core' into afs-next
These AFS patches need the timer_reduce() patch from timers/core. Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/kthread.c10
-rw-r--r--kernel/rcu/rcutorture.c4
-rw-r--r--kernel/rcu/tree_plugin.h9
-rw-r--r--kernel/time/Kconfig2
-rw-r--r--kernel/time/clockevents.c21
-rw-r--r--kernel/time/ntp.c227
-rw-r--r--kernel/time/ntp_internal.h1
-rw-r--r--kernel/time/posix-stubs.c20
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/time.c71
-rw-r--r--kernel/time/timekeeping.c182
-rw-r--r--kernel/time/timekeeping.h2
-rw-r--r--kernel/time/timer.c82
-rw-r--r--kernel/workqueue.c29
15 files changed, 396 insertions, 267 deletions
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 987d7bca4864..1215229d1c12 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,7 +21,7 @@ static int irqfixup __read_mostly;
#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
static void poll_spurious_irqs(unsigned long dummy);
-static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
static int irq_poll_cpu;
static atomic_t irq_poll_active;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1c19edf82427..ba3992c8c375 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -798,15 +798,14 @@ EXPORT_SYMBOL_GPL(kthread_queue_work);
/**
* kthread_delayed_work_timer_fn - callback that queues the associated kthread
* delayed work when the timer expires.
- * @__data: pointer to the data associated with the timer
+ * @t: pointer to the expired timer
*
* The format of the function is defined by struct timer_list.
* It should have been called from irqsafe timer with irq already off.
*/
-void kthread_delayed_work_timer_fn(unsigned long __data)
+void kthread_delayed_work_timer_fn(struct timer_list *t)
{
- struct kthread_delayed_work *dwork =
- (struct kthread_delayed_work *)__data;
+ struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
struct kthread_work *work = &dwork->work;
struct kthread_worker *worker = work->worker;
@@ -837,8 +836,7 @@ void __kthread_queue_delayed_work(struct kthread_worker *worker,
struct timer_list *timer = &dwork->timer;
struct kthread_work *work = &dwork->work;
- WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn ||
- timer->data != (unsigned long)dwork);
+ WARN_ON_ONCE(timer->function != (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn);
/*
* If @delay is 0, queue @dwork->work immediately. This is for
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 45f2ffbc1e78..96a3cdaeed91 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1076,7 +1076,7 @@ static void rcu_torture_timer_cb(struct rcu_head *rhp)
* counter in the element should never be greater than 1, otherwise, the
* RCU implementation is broken.
*/
-static void rcu_torture_timer(unsigned long unused)
+static void rcu_torture_timer(struct timer_list *unused)
{
int idx;
unsigned long started;
@@ -1163,7 +1163,7 @@ rcu_torture_reader(void *arg)
VERBOSE_TOROUT_STRING("rcu_torture_reader task started");
set_user_nice(current, MAX_NICE);
if (irqreader && cur_ops->irq_capable)
- setup_timer_on_stack(&t, rcu_torture_timer, 0);
+ timer_setup_on_stack(&t, rcu_torture_timer, 0);
do {
if (irqreader && cur_ops->irq_capable) {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e012b9be777e..e85946d9843b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2261,9 +2261,11 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
}
/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
-static void do_nocb_deferred_wakeup_timer(unsigned long x)
+static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
{
- do_nocb_deferred_wakeup_common((struct rcu_data *)x);
+ struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
+
+ do_nocb_deferred_wakeup_common(rdp);
}
/*
@@ -2327,8 +2329,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
init_swait_queue_head(&rdp->nocb_wq);
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
raw_spin_lock_init(&rdp->nocb_lock);
- setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer,
- (unsigned long)rdp);
+ timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
}
/*
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index ac09bc29eb08..d689a9557e17 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -56,7 +56,7 @@ menu "Timers subsystem"
# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
# only related to the tick functionality. Oneshot clockevent devices
-# are supported independ of this.
+# are supported independent of this.
config TICK_ONESHOT
bool
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 4237e0744e26..16c027e9cc73 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -280,17 +280,22 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
- int64_t delta;
+ int64_t delta = 0;
+ int i;
- delta = dev->min_delta_ns;
- dev->next_event = ktime_add_ns(ktime_get(), delta);
+ for (i = 0; i < 10; i++) {
+ delta += dev->min_delta_ns;
+ dev->next_event = ktime_add_ns(ktime_get(), delta);
- if (clockevent_state_shutdown(dev))
- return 0;
+ if (clockevent_state_shutdown(dev))
+ return 0;
- dev->retries++;
- clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
- return dev->set_next_event((unsigned long) clc, dev);
+ dev->retries++;
+ clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+ if (dev->set_next_event((unsigned long) clc, dev) == 0)
+ return 0;
+ }
+ return -ETIME;
}
#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 99e03bec68e4..8d70da1b9a0d 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -493,6 +493,67 @@ out:
return leap;
}
+static void sync_hw_clock(struct work_struct *work);
+static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock);
+
+static void sched_sync_hw_clock(struct timespec64 now,
+ unsigned long target_nsec, bool fail)
+
+{
+ struct timespec64 next;
+
+ getnstimeofday64(&next);
+ if (!fail)
+ next.tv_sec = 659;
+ else {
+ /*
+ * Try again as soon as possible. Delaying long periods
+ * decreases the accuracy of the work queue timer. Due to this
+ * the algorithm is very likely to require a short-sleep retry
+ * after the above long sleep to synchronize ts_nsec.
+ */
+ next.tv_sec = 0;
+ }
+
+ /* Compute the needed delay that will get to tv_nsec == target_nsec */
+ next.tv_nsec = target_nsec - next.tv_nsec;
+ if (next.tv_nsec <= 0)
+ next.tv_nsec += NSEC_PER_SEC;
+ if (next.tv_nsec >= NSEC_PER_SEC) {
+ next.tv_sec++;
+ next.tv_nsec -= NSEC_PER_SEC;
+ }
+
+ queue_delayed_work(system_power_efficient_wq, &sync_work,
+ timespec64_to_jiffies(&next));
+}
+
+static void sync_rtc_clock(void)
+{
+ unsigned long target_nsec;
+ struct timespec64 adjust, now;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_RTC_SYSTOHC))
+ return;
+
+ getnstimeofday64(&now);
+
+ adjust = now;
+ if (persistent_clock_is_local)
+ adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
+
+ /*
+ * The current RTC in use will provide the target_nsec it wants to be
+ * called at, and does rtc_tv_nsec_ok internally.
+ */
+ rc = rtc_set_ntp_time(adjust, &target_nsec);
+ if (rc == -ENODEV)
+ return;
+
+ sched_sync_hw_clock(now, target_nsec, rc);
+}
+
#ifdef CONFIG_GENERIC_CMOS_UPDATE
int __weak update_persistent_clock(struct timespec now)
{
@@ -508,76 +569,75 @@ int __weak update_persistent_clock64(struct timespec64 now64)
}
#endif
-#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
-static void sync_cmos_clock(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
-
-static void sync_cmos_clock(struct work_struct *work)
+static bool sync_cmos_clock(void)
{
+ static bool no_cmos;
struct timespec64 now;
- struct timespec64 next;
- int fail = 1;
+ struct timespec64 adjust;
+ int rc = -EPROTO;
+ long target_nsec = NSEC_PER_SEC / 2;
+
+ if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE))
+ return false;
+
+ if (no_cmos)
+ return false;
/*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
- * We want the clock to be within a couple of ticks from the target.
+ * Historically update_persistent_clock64() has followed x86
+ * semantics, which match the MC146818A/etc RTC. This RTC will store
+ * 'adjust' and then in .5s it will advance once second.
+ *
+ * Architectures are strongly encouraged to use rtclib and not
+ * implement this legacy API.
*/
- if (!ntp_synced()) {
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
- return;
- }
-
getnstimeofday64(&now);
- if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {
- struct timespec64 adjust = now;
-
- fail = -ENODEV;
+ if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) {
if (persistent_clock_is_local)
adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
- fail = update_persistent_clock64(adjust);
-#endif
-
-#ifdef CONFIG_RTC_SYSTOHC
- if (fail == -ENODEV)
- fail = rtc_set_ntp_time(adjust);
-#endif
+ rc = update_persistent_clock64(adjust);
+ /*
+ * The machine does not support update_persistent_clock64 even
+ * though it defines CONFIG_GENERIC_CMOS_UPDATE.
+ */
+ if (rc == -ENODEV) {
+ no_cmos = true;
+ return false;
+ }
}
- next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
- if (next.tv_nsec <= 0)
- next.tv_nsec += NSEC_PER_SEC;
+ sched_sync_hw_clock(now, target_nsec, rc);
+ return true;
+}
- if (!fail || fail == -ENODEV)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
+/*
+ * If we have an externally synchronized Linux clock, then update RTC clock
+ * accordingly every ~11 minutes. Generally RTCs can only store second
+ * precision, but many RTCs will adjust the phase of their second tick to
+ * match the moment of update. This infrastructure arranges to call to the RTC
+ * set at the correct moment to phase synchronize the RTC second tick over
+ * with the kernel clock.
+ */
+static void sync_hw_clock(struct work_struct *work)
+{
+ if (!ntp_synced())
+ return;
- if (next.tv_nsec >= NSEC_PER_SEC) {
- next.tv_sec++;
- next.tv_nsec -= NSEC_PER_SEC;
- }
- queue_delayed_work(system_power_efficient_wq,
- &sync_cmos_work, timespec64_to_jiffies(&next));
+ if (sync_cmos_clock())
+ return;
+
+ sync_rtc_clock();
}
void ntp_notify_cmos_timer(void)
{
- queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
-}
-
-#else
-void ntp_notify_cmos_timer(void) { }
-#endif
+ if (!ntp_synced())
+ return;
+ if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) ||
+ IS_ENABLED(CONFIG_RTC_SYSTOHC))
+ queue_delayed_work(system_power_efficient_wq, &sync_work, 0);
+}
/*
* Propagate a new txc->status value into the NTP state:
@@ -654,67 +714,6 @@ static inline void process_adjtimex_modes(struct timex *txc,
}
-
-/**
- * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
- */
-int ntp_validate_timex(struct timex *txc)
-{
- if (txc->modes & ADJ_ADJTIME) {
- /* singleshot must not be used with any other mode bits */
- if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
- return -EINVAL;
- if (!(txc->modes & ADJ_OFFSET_READONLY) &&
- !capable(CAP_SYS_TIME))
- return -EPERM;
- } else {
- /* In order to modify anything, you gotta be super-user! */
- if (txc->modes && !capable(CAP_SYS_TIME))
- return -EPERM;
- /*
- * if the quartz is off by more than 10% then
- * something is VERY wrong!
- */
- if (txc->modes & ADJ_TICK &&
- (txc->tick < 900000/USER_HZ ||
- txc->tick > 1100000/USER_HZ))
- return -EINVAL;
- }
-
- if (txc->modes & ADJ_SETOFFSET) {
- /* In order to inject time, you gotta be super-user! */
- if (!capable(CAP_SYS_TIME))
- return -EPERM;
-
- if (txc->modes & ADJ_NANO) {
- struct timespec ts;
-
- ts.tv_sec = txc->time.tv_sec;
- ts.tv_nsec = txc->time.tv_usec;
- if (!timespec_inject_offset_valid(&ts))
- return -EINVAL;
-
- } else {
- if (!timeval_inject_offset_valid(&txc->time))
- return -EINVAL;
- }
- }
-
- /*
- * Check for potential multiplication overflows that can
- * only happen on 64-bit systems:
- */
- if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
- if (LLONG_MIN / PPM_SCALE > txc->freq)
- return -EINVAL;
- if (LLONG_MAX / PPM_SCALE < txc->freq)
- return -EINVAL;
- }
-
- return 0;
-}
-
-
/*
* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 0a53e6ea47b1..909bd1f1bfb1 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,7 +8,6 @@ extern void ntp_clear(void);
extern u64 ntp_tick_length(void);
extern ktime_t ntp_get_next_leap(void);
extern int second_overflow(time64_t secs);
-extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 06f34feb635e..b258bee13b02 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -117,8 +117,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
const struct timespec __user *, rqtp,
struct timespec __user *, rmtp)
{
- struct timespec64 t64;
- struct timespec t;
+ struct timespec64 t;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -129,16 +128,15 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
return -EINVAL;
}
- if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+ if (get_timespec64(&t, rqtp))
return -EFAULT;
- t64 = timespec_to_timespec64(t);
- if (!timespec64_valid(&t64))
+ if (!timespec64_valid(&t))
return -EINVAL;
if (flags & TIMER_ABSTIME)
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+ return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -203,8 +201,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
struct compat_timespec __user *, rqtp,
struct compat_timespec __user *, rmtp)
{
- struct timespec64 t64;
- struct timespec t;
+ struct timespec64 t;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -215,16 +212,15 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
return -EINVAL;
}
- if (compat_get_timespec(&t, rqtp))
+ if (compat_get_timespec64(&t, rqtp))
return -EFAULT;
- t64 = timespec_to_timespec64(t);
- if (!timespec64_valid(&t64))
+ if (!timespec64_valid(&t))
return -EINVAL;
if (flags & TIMER_ABSTIME)
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+ return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 6b009c207671..c1f518e7aa80 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -33,6 +33,7 @@ int tick_program_event(ktime_t expires, int force)
* We don't need the clock event device any more, stop it.
*/
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
+ dev->next_event = KTIME_MAX;
return 0;
}
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 44a8c1402133..bd4e6c7dd689 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -82,7 +82,7 @@ SYSCALL_DEFINE1(time, time_t __user *, tloc)
SYSCALL_DEFINE1(stime, time_t __user *, tptr)
{
- struct timespec tv;
+ struct timespec64 tv;
int err;
if (get_user(tv.tv_sec, tptr))
@@ -90,11 +90,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
tv.tv_nsec = 0;
- err = security_settime(&tv, NULL);
+ err = security_settime64(&tv, NULL);
if (err)
return err;
- do_settimeofday(&tv);
+ do_settimeofday64(&tv);
return 0;
}
@@ -122,7 +122,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
{
- struct timespec tv;
+ struct timespec64 tv;
int err;
if (get_user(tv.tv_sec, tptr))
@@ -130,11 +130,11 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
tv.tv_nsec = 0;
- err = security_settime(&tv, NULL);
+ err = security_settime64(&tv, NULL);
if (err)
return err;
- do_settimeofday(&tv);
+ do_settimeofday64(&tv);
return 0;
}
@@ -158,40 +158,6 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
}
/*
- * Indicates if there is an offset between the system clock and the hardware
- * clock/persistent clock/rtc.
- */
-int persistent_clock_is_local;
-
-/*
- * Adjust the time obtained from the CMOS to be UTC time instead of
- * local time.
- *
- * This is ugly, but preferable to the alternatives. Otherwise we
- * would either need to write a program to do it in /etc/rc (and risk
- * confusion if the program gets run more than once; it would also be
- * hard to make the program warp the clock precisely n hours) or
- * compile in the timezone information into the kernel. Bad, bad....
- *
- * - TYT, 1992-01-01
- *
- * The best thing to do is to keep the CMOS clock in universal time (UTC)
- * as real UNIX machines always do it. This avoids all headaches about
- * daylight saving times and warping kernel clocks.
- */
-static inline void warp_clock(void)
-{
- if (sys_tz.tz_minuteswest != 0) {
- struct timespec adjust;
-
- persistent_clock_is_local = 1;
- adjust.tv_sec = sys_tz.tz_minuteswest * 60;
- adjust.tv_nsec = 0;
- timekeeping_inject_offset(&adjust);
- }
-}
-
-/*
* In case for some reason the CMOS clock has not already been running
* in UTC, but in some local time: The first time we set the timezone,
* we will warp the clock so that it is ticking UTC time instead of
@@ -224,7 +190,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz
if (firsttime) {
firsttime = 0;
if (!tv)
- warp_clock();
+ timekeeping_warp_clock();
}
}
if (tv)
@@ -441,6 +407,7 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
}
EXPORT_SYMBOL(mktime64);
+#if __BITS_PER_LONG == 32
/**
* set_normalized_timespec - set timespec sec and nsec parts and normalize
*
@@ -501,6 +468,7 @@ struct timespec ns_to_timespec(const s64 nsec)
return ts;
}
EXPORT_SYMBOL(ns_to_timespec);
+#endif
/**
* ns_to_timeval - Convert nanoseconds to timeval
@@ -520,7 +488,6 @@ struct timeval ns_to_timeval(const s64 nsec)
}
EXPORT_SYMBOL(ns_to_timeval);
-#if BITS_PER_LONG == 32
/**
* set_normalized_timespec - set timespec sec and nsec parts and normalize
*
@@ -581,7 +548,7 @@ struct timespec64 ns_to_timespec64(const s64 nsec)
return ts;
}
EXPORT_SYMBOL(ns_to_timespec64);
-#endif
+
/**
* msecs_to_jiffies: - convert milliseconds to jiffies
* @m: time in milliseconds
@@ -853,24 +820,6 @@ unsigned long nsecs_to_jiffies(u64 n)
EXPORT_SYMBOL_GPL(nsecs_to_jiffies);
/*
- * Add two timespec values and do a safety check for overflow.
- * It's assumed that both values are valid (>= 0)
- */
-struct timespec timespec_add_safe(const struct timespec lhs,
- const struct timespec rhs)
-{
- struct timespec res;
-
- set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
- lhs.tv_nsec + rhs.tv_nsec);
-
- if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
- res.tv_sec = TIME_T_MAX;
-
- return res;
-}
-
-/*
* Add two timespec64 values and do a safety check for overflow.
* It's assumed that both values are valid (>= 0).
* And, each timespec64 is in normalized form.
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2cafb49aa65e..198afa78bf69 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -60,8 +60,27 @@ struct tk_fast {
struct tk_read_base base[2];
};
-static struct tk_fast tk_fast_mono ____cacheline_aligned;
-static struct tk_fast tk_fast_raw ____cacheline_aligned;
+/* Suspend-time cycles value for halted fast timekeeper. */
+static u64 cycles_at_suspend;
+
+static u64 dummy_clock_read(struct clocksource *cs)
+{
+ return cycles_at_suspend;
+}
+
+static struct clocksource dummy_clock = {
+ .read = dummy_clock_read,
+};
+
+static struct tk_fast tk_fast_mono ____cacheline_aligned = {
+ .base[0] = { .clock = &dummy_clock, },
+ .base[1] = { .clock = &dummy_clock, },
+};
+
+static struct tk_fast tk_fast_raw ____cacheline_aligned = {
+ .base[0] = { .clock = &dummy_clock, },
+ .base[1] = { .clock = &dummy_clock, },
+};
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@@ -477,17 +496,39 @@ u64 notrace ktime_get_boot_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
-/* Suspend-time cycles value for halted fast timekeeper. */
-static u64 cycles_at_suspend;
-static u64 dummy_clock_read(struct clocksource *cs)
+/*
+ * See comment for __ktime_get_fast_ns() vs. timestamp ordering
+ */
+static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
{
- return cycles_at_suspend;
+ struct tk_read_base *tkr;
+ unsigned int seq;
+ u64 now;
+
+ do {
+ seq = raw_read_seqcount_latch(&tkf->seq);
+ tkr = tkf->base + (seq & 0x01);
+ now = ktime_to_ns(tkr->base_real);
+
+ now += timekeeping_delta_to_ns(tkr,
+ clocksource_delta(
+ tk_clock_read(tkr),
+ tkr->cycle_last,
+ tkr->mask));
+ } while (read_seqcount_retry(&tkf->seq, seq));
+
+ return now;
}
-static struct clocksource dummy_clock = {
- .read = dummy_clock_read,
-};
+/**
+ * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
+ */
+u64 ktime_get_real_fast_ns(void)
+{
+ return __ktime_get_real_fast_ns(&tk_fast_mono);
+}
+EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
@@ -507,6 +548,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
cycles_at_suspend = tk_clock_read(tkr);
tkr_dummy.clock = &dummy_clock;
+ tkr_dummy.base_real = tkr->base + tk->offs_real;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
@@ -654,6 +696,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
update_vsyscall(tk);
update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
+ tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
@@ -1264,33 +1307,31 @@ EXPORT_SYMBOL(do_settimeofday64);
*
* Adds or subtracts an offset value from the current time.
*/
-int timekeeping_inject_offset(struct timespec *ts)
+static int timekeeping_inject_offset(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
- struct timespec64 ts64, tmp;
+ struct timespec64 tmp;
int ret = 0;
- if (!timespec_inject_offset_valid(ts))
+ if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
- ts64 = timespec_to_timespec64(*ts);
-
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
timekeeping_forward_now(tk);
/* Make sure the proposed value is valid */
- tmp = timespec64_add(tk_xtime(tk), ts64);
- if (timespec64_compare(&tk->wall_to_monotonic, &ts64) > 0 ||
+ tmp = timespec64_add(tk_xtime(tk), *ts);
+ if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
!timespec64_valid_strict(&tmp)) {
ret = -EINVAL;
goto error;
}
- tk_xtime_add(tk, &ts64);
- tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64));
+ tk_xtime_add(tk, ts);
+ tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
error: /* even if we error out, we forwarded the time, so call update */
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1303,7 +1344,40 @@ error: /* even if we error out, we forwarded the time, so call update */
return ret;
}
-EXPORT_SYMBOL(timekeeping_inject_offset);
+
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives. Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours) or
+ * compile in the timezone information into the kernel. Bad, bad....
+ *
+ * - TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+void timekeeping_warp_clock(void)
+{
+ if (sys_tz.tz_minuteswest != 0) {
+ struct timespec64 adjust;
+
+ persistent_clock_is_local = 1;
+ adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+ adjust.tv_nsec = 0;
+ timekeeping_inject_offset(&adjust);
+ }
+}
/**
* __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
@@ -2248,6 +2322,72 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
}
/**
+ * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
+ */
+static int timekeeping_validate_timex(struct timex *txc)
+{
+ if (txc->modes & ADJ_ADJTIME) {
+ /* singleshot must not be used with any other mode bits */
+ if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
+ return -EINVAL;
+ if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+ !capable(CAP_SYS_TIME))
+ return -EPERM;
+ } else {
+ /* In order to modify anything, you gotta be super-user! */
+ if (txc->modes && !capable(CAP_SYS_TIME))
+ return -EPERM;
+ /*
+ * if the quartz is off by more than 10% then
+ * something is VERY wrong!
+ */
+ if (txc->modes & ADJ_TICK &&
+ (txc->tick < 900000/USER_HZ ||
+ txc->tick > 1100000/USER_HZ))
+ return -EINVAL;
+ }
+
+ if (txc->modes & ADJ_SETOFFSET) {
+ /* In order to inject time, you gotta be super-user! */
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ /*
+ * Validate if a timespec/timeval used to inject a time
+ * offset is valid. Offsets can be postive or negative, so
+ * we don't check tv_sec. The value of the timeval/timespec
+ * is the sum of its fields,but *NOTE*:
+ * The field tv_usec/tv_nsec must always be non-negative and
+ * we can't have more nanoseconds/microseconds than a second.
+ */
+ if (txc->time.tv_usec < 0)
+ return -EINVAL;
+
+ if (txc->modes & ADJ_NANO) {
+ if (txc->time.tv_usec >= NSEC_PER_SEC)
+ return -EINVAL;
+ } else {
+ if (txc->time.tv_usec >= USEC_PER_SEC)
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Check for potential multiplication overflows that can
+ * only happen on 64-bit systems:
+ */
+ if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
+ if (LLONG_MIN / PPM_SCALE > txc->freq)
+ return -EINVAL;
+ if (LLONG_MAX / PPM_SCALE < txc->freq)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function
*/
int do_adjtimex(struct timex *txc)
@@ -2259,12 +2399,12 @@ int do_adjtimex(struct timex *txc)
int ret;
/* Validate the data before disabling interrupts */
- ret = ntp_validate_timex(txc);
+ ret = timekeeping_validate_timex(txc);
if (ret)
return ret;
if (txc->modes & ADJ_SETOFFSET) {
- struct timespec delta;
+ struct timespec64 delta;
delta.tv_sec = txc->time.tv_sec;
delta.tv_nsec = txc->time.tv_usec;
if (!(txc->modes & ADJ_NANO))
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index c9f9af339914..7a9b4eb7a1d5 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -11,7 +11,7 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
-extern int timekeeping_inject_offset(struct timespec *ts);
+extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f2674a056c26..af0b8bae4502 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -610,7 +610,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state)
}
/* Stub timer callback for improperly used timers. */
-static void stub_timer(unsigned long data)
+static void stub_timer(struct timer_list *unused)
{
WARN_ON(1);
}
@@ -626,7 +626,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- setup_timer(timer, stub_timer, 0);
+ timer_setup(timer, stub_timer, 0);
return true;
case ODEBUG_STATE_ACTIVE:
@@ -665,7 +665,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- setup_timer(timer, stub_timer, 0);
+ timer_setup(timer, stub_timer, 0);
return true;
default:
return false;
@@ -929,8 +929,11 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
}
}
+#define MOD_TIMER_PENDING_ONLY 0x01
+#define MOD_TIMER_REDUCE 0x02
+
static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
{
struct timer_base *base, *new_base;
unsigned int idx = UINT_MAX;
@@ -950,7 +953,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* larger granularity than you would get from adding a new
* timer with this expiry.
*/
- if (timer->expires == expires)
+ long diff = timer->expires - expires;
+
+ if (!diff)
+ return 1;
+ if (options & MOD_TIMER_REDUCE && diff <= 0)
return 1;
/*
@@ -962,6 +969,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
base = lock_timer_base(timer, &flags);
forward_timer_base(base);
+ if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
+ time_before_eq(timer->expires, expires)) {
+ ret = 1;
+ goto out_unlock;
+ }
+
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -971,7 +984,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* subsequent call will exit in the expires check above.
*/
if (idx == timer_get_idx(timer)) {
- timer->expires = expires;
+ if (!(options & MOD_TIMER_REDUCE))
+ timer->expires = expires;
+ else if (time_after(timer->expires, expires))
+ timer->expires = expires;
ret = 1;
goto out_unlock;
}
@@ -981,7 +997,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
ret = detach_if_pending(timer, base, false);
- if (!ret && pending_only)
+ if (!ret && (options & MOD_TIMER_PENDING_ONLY))
goto out_unlock;
debug_activate(timer, expires);
@@ -1042,7 +1058,7 @@ out_unlock:
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
- return __mod_timer(timer, expires, true);
+ return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
}
EXPORT_SYMBOL(mod_timer_pending);
@@ -1068,11 +1084,26 @@ EXPORT_SYMBOL(mod_timer_pending);
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
- return __mod_timer(timer, expires, false);
+ return __mod_timer(timer, expires, 0);
}
EXPORT_SYMBOL(mod_timer);
/**
+ * timer_reduce - Modify a timer's timeout if it would reduce the timeout
+ * @timer: The timer to be modified
+ * @expires: New timeout in jiffies
+ *
+ * timer_reduce() is very similar to mod_timer(), except that it will only
+ * modify a running timer if that would reduce the expiration time (it will
+ * start a timer that isn't running).
+ */
+int timer_reduce(struct timer_list *timer, unsigned long expires)
+{
+ return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
+}
+EXPORT_SYMBOL(timer_reduce);
+
+/**
* add_timer - start a timer
* @timer: the timer to be added
*
@@ -1560,8 +1591,11 @@ static int collect_expired_timers(struct timer_base *base,
* jiffies, otherwise forward to the next expiry time:
*/
if (time_after(next, jiffies)) {
- /* The call site will increment clock! */
- base->clk = jiffies - 1;
+ /*
+ * The call site will increment base->clk and then
+ * terminate the expiry loop immediately.
+ */
+ base->clk = jiffies;
return 0;
}
base->clk = next;
@@ -1668,9 +1702,20 @@ void run_local_timers(void)
raise_softirq(TIMER_SOFTIRQ);
}
-static void process_timeout(unsigned long __data)
+/*
+ * Since schedule_timeout()'s timer is defined on the stack, it must store
+ * the target task on the stack as well.
+ */
+struct process_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
{
- wake_up_process((struct task_struct *)__data);
+ struct process_timer *timeout = from_timer(timeout, t, timer);
+
+ wake_up_process(timeout->task);
}
/**
@@ -1704,7 +1749,7 @@ static void process_timeout(unsigned long __data)
*/
signed long __sched schedule_timeout(signed long timeout)
{
- struct timer_list timer;
+ struct process_timer timer;
unsigned long expire;
switch (timeout)
@@ -1738,13 +1783,14 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
- setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
- __mod_timer(&timer, expire, false);
+ timer.task = current;
+ timer_setup_on_stack(&timer.timer, process_timeout, 0);
+ __mod_timer(&timer.timer, expire, 0);
schedule();
- del_singleshot_timer_sync(&timer);
+ del_singleshot_timer_sync(&timer.timer);
/* Remove the timer from the object tracker */
- destroy_timer_on_stack(&timer);
+ destroy_timer_on_stack(&timer.timer);
timeout = expire - jiffies;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a2dccfe1acec..3b67c0a0df16 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1493,9 +1493,9 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL(queue_work_on);
-void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(struct timer_list *t)
{
- struct delayed_work *dwork = (struct delayed_work *)__data;
+ struct delayed_work *dwork = from_timer(dwork, t, timer);
/* should have been called from irqsafe timer with irq already off */
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
@@ -1509,8 +1509,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
struct work_struct *work = &dwork->work;
WARN_ON_ONCE(!wq);
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
- timer->data != (unsigned long)dwork);
+ WARN_ON_ONCE(timer->function != (TIMER_FUNC_TYPE)delayed_work_timer_fn);
WARN_ON_ONCE(timer_pending(timer));
WARN_ON_ONCE(!list_empty(&work->entry));
@@ -1833,9 +1832,9 @@ static void destroy_worker(struct worker *worker)
wake_up_process(worker->task);
}
-static void idle_worker_timeout(unsigned long __pool)
+static void idle_worker_timeout(struct timer_list *t)
{
- struct worker_pool *pool = (void *)__pool;
+ struct worker_pool *pool = from_timer(pool, t, idle_timer);
spin_lock_irq(&pool->lock);
@@ -1881,9 +1880,9 @@ static void send_mayday(struct work_struct *work)
}
}
-static void pool_mayday_timeout(unsigned long __pool)
+static void pool_mayday_timeout(struct timer_list *t)
{
- struct worker_pool *pool = (void *)__pool;
+ struct worker_pool *pool = from_timer(pool, t, mayday_timer);
struct work_struct *work;
spin_lock_irq(&pool->lock);
@@ -3236,11 +3235,9 @@ static int init_worker_pool(struct worker_pool *pool)
INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
- setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout,
- (unsigned long)pool);
+ timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
- setup_timer(&pool->mayday_timer, pool_mayday_timeout,
- (unsigned long)pool);
+ timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
mutex_init(&pool->attach_mutex);
INIT_LIST_HEAD(&pool->workers);
@@ -5383,11 +5380,8 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
*/
#ifdef CONFIG_WQ_WATCHDOG
-static void wq_watchdog_timer_fn(unsigned long data);
-
static unsigned long wq_watchdog_thresh = 30;
-static struct timer_list wq_watchdog_timer =
- TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
+static struct timer_list wq_watchdog_timer;
static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
@@ -5401,7 +5395,7 @@ static void wq_watchdog_reset_touched(void)
per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
}
-static void wq_watchdog_timer_fn(unsigned long data)
+static void wq_watchdog_timer_fn(struct timer_list *unused)
{
unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
bool lockup_detected = false;
@@ -5503,6 +5497,7 @@ module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
static void wq_watchdog_init(void)
{
+ timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
wq_watchdog_set_thresh(wq_watchdog_thresh);
}