From a0e3213f836640065e2a23ad55fa6f72e812a4f1 Mon Sep 17 00:00:00 2001 From: zhuo-hao Date: Tue, 17 Nov 2015 20:08:07 +0800 Subject: alarmtimer: Avoid unexpected rtc interrupt when system resume from S3 Before the system go to suspend (S3), if user create a timer with clockid CLOCK_REALTIME_ALARM/CLOCK_BOOTTIME_ALARM and set a "large" timeout value to this timer. The function alarmtimer_suspend will be called to setup a timeout value to RTC timer to avoid the system sleep over time. However, if the system wakeup early than RTC timeout, the RTC timer will not be cleared. And this will cause the hpet_rtc_interrupt come unexpectedly until the RTC timeout. To fix this problem, just adding alarmtimer_resume to cancel the RTC timer. This was noticed because the HPET RTC emulation fires an interrupt every 16ms(=1/2^DEFAULT_RTC_SHIFT) up to the point where the alarm time is reached. This program always hits this situation (https://lkml.org/lkml/2015/11/8/326), if system wake up earlier than alarm time. Cc: Thomas Gleixner Cc: John Stultz Signed-off-by: Zhuo-hao Lee [jstultz: Tweak commit subject & formatting slightly] Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 7fbba635a549..e840ed867a5d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -271,11 +271,27 @@ static int alarmtimer_suspend(struct device *dev) __pm_wakeup_event(ws, MSEC_PER_SEC); return ret; } + +static int alarmtimer_resume(struct device *dev) +{ + struct rtc_device *rtc; + + rtc = alarmtimer_get_rtcdev(); + if (rtc) + rtc_timer_cancel(rtc, &rtctimer); + return 0; +} + #else static int alarmtimer_suspend(struct device *dev) { return 0; } + +static int alarmtimer_resume(struct device *dev) +{ + return 0; +} #endif static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) @@ -800,6 +816,7 @@ out: /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { .suspend = alarmtimer_suspend, + .resume = alarmtimer_resume, }; static struct platform_driver alarmtimer_driver = { -- cgit v1.2.3 From 35a4933a895927990772ae96fdcfd2f806929ee2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 30 Nov 2015 12:30:30 +1100 Subject: time: Avoid signed overflow in timekeeping_get_ns() 1e75fa8 "time: Condense timekeeper.xtime into xtime_sec" replaced a call to clocksource_cyc2ns() from timekeeping_get_ns() with an open-coded version of the same logic to avoid keeping a semi-redundant struct timespec in struct timekeeper. However, the commit also introduced a subtle semantic change - where clocksource_cyc2ns() uses purely unsigned math, the new version introduces a signed temporary, meaning that if (delta * tk->mult) has a 63-bit overflow the following shift will still give a negative result. The choice of 'maxsec' in __clocksource_updatefreq_scale() means this will generally happen if there's a ~10 minute pause in examining the clocksource. This can be triggered on a powerpc KVM guest by stopping it from qemu for a bit over 10 minutes. After resuming time has jumped backwards several minutes causing numerous problems (jiffies does not advance, msleep()s can be extended by minutes..). It doesn't happen on x86 KVM guests, because the guest TSC is effectively frozen while the guest is stopped, which is not the case for the powerpc timebase. Obviously an unsigned (64 bit) overflow will only take twice as long as a signed, 63-bit overflow. I don't know the time code well enough to know if that will still cause incorrect calculations, or if a 64-bit overflow is avoided elsewhere. Still, an incorrect forwards clock adjustment will cause less trouble than time going backwards. So, this patch removes the potential for intermediate signed overflow. Cc: stable@vger.kernel.org (3.7+) Suggested-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: David Gibson Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d563c1960302..99188ee5d9d0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -305,8 +305,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) delta = timekeeping_get_delta(tkr); - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); -- cgit v1.2.3 From 390dd67c471a43a8a0f36c7d5177de49e7749c59 Mon Sep 17 00:00:00 2001 From: Seiichi Ikarashi Date: Thu, 10 Sep 2015 18:01:56 +0900 Subject: clocksource: Add CPU info to clocksource watchdog reporting The clocksource watchdog reporting was improved by 0b046b217ad4c6. I want to add the info of CPU where the watchdog detects a deviation because it is necessary to identify the trouble spot if the clocksource is TSC. Signed-off-by: Seiichi Ikarashi [jstultz: Tweaked commit message] Signed-off-by: John Stultz --- kernel/time/clocksource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1347882d131e..664de539299b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -218,8 +218,8 @@ static void clocksource_watchdog(unsigned long data) /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { - pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", - cs->name); + pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", + smp_processor_id(), cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", watchdog->name, wdnow, wdlast, watchdog->mask); pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", -- cgit v1.2.3 From 52d189f1b38810b1b483d5bac2e4fa90b9afd372 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 3 Dec 2015 15:46:48 -0500 Subject: ntp: Verify offset doesn't overflow in ntp_update_offset We need to make sure that the offset is valid before manipulating it, otherwise it might overflow on the multiplication. Cc: Sasha Levin Cc: Richard Cochran Cc: Thomas Gleixner Signed-off-by: Sasha Levin [jstultz: Reworked one of the checks so it makes more sense] Signed-off-by: John Stultz --- kernel/time/ntp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 149cc8086aea..125fc0342355 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -297,15 +297,17 @@ static void ntp_update_offset(long offset) if (!(time_status & STA_PLL)) return; - if (!(time_status & STA_NANO)) + if (!(time_status & STA_NANO)) { + /* Make sure the multiplication below won't overflow */ + offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC); offset *= NSEC_PER_USEC; + } /* * Scale the phase adjustment and * clamp to the operating range. */ - offset = min(offset, MAXPHASE); - offset = max(offset, -MAXPHASE); + offset = clamp(offset, -MAXPHASE, MAXPHASE); /* * Select how the frequency is to be controlled -- cgit v1.2.3 From 37cf4dc3370fbca0344e23bb96446eb2c3548ba7 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 3 Dec 2015 22:09:31 -0500 Subject: time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow For adjtimex()'s ADJ_SETOFFSET, make sure the tv_usec value is sane. We might multiply them later which can cause an overflow and undefined behavior. This patch introduces new helper functions to simplify the checking code and adds comments to clarify Orginally this patch was by Sasha Levin, but I've basically rewritten it, so he should get credit for finding the issue and I should get the blame for any mistakes made since. Also, credit to Richard Cochran for the phrasing used in the comment for what is considered valid here. Cc: Sasha Levin Cc: Richard Cochran Cc: Thomas Gleixner Reported-by: Sasha Levin Signed-off-by: John Stultz --- include/linux/time.h | 26 ++++++++++++++++++++++++++ kernel/time/ntp.c | 10 ++++++++-- kernel/time/timekeeping.c | 2 +- 3 files changed, 35 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/time.h b/include/linux/time.h index beebe3a02d43..297f09f23896 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,6 +125,32 @@ static inline bool timeval_valid(const struct timeval *tv) extern struct timespec timespec_trunc(struct timespec t, unsigned gran); +/* + * Validates if a timespec/timeval used to inject a time offset is valid. + * Offsets can be postive or negative. The value of the timeval/timespec + * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must + * always be non-negative. + */ +static inline bool timeval_inject_offset_valid(const struct timeval *tv) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec_inject_offset_valid(const struct timespec *ts) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more nanoseconds then a second */ + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 125fc0342355..4073c9550af9 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -676,8 +676,14 @@ int ntp_validate_timex(struct timex *txc) return -EINVAL; } - if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) - return -EPERM; + if (txc->modes & ADJ_SETOFFSET) { + /* In order to inject time, you gotta be super-user! */ + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (!timeval_inject_offset_valid(&txc->time)) + return -EINVAL; + } /* * Check for potential multiplication overflows that can diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 99188ee5d9d0..d9249daf14ba 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -958,7 +958,7 @@ int timekeeping_inject_offset(struct timespec *ts) struct timespec64 ts64, tmp; int ret = 0; - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + if (!timespec_inject_offset_valid(ts)) return -EINVAL; ts64 = timespec_to_timespec64(*ts); -- cgit v1.2.3 From dee3665416a8553279d10b62b5e62685cbe5daa8 Mon Sep 17 00:00:00 2001 From: DengChao Date: Sun, 13 Dec 2015 12:24:18 +0800 Subject: timekeeping: Provide internal function __ktime_get_real_seconds In order to fix Y2038 issues in the ntp code we will need replace get_seconds() with ktime_get_real_seconds() but as the ntp code uses the timekeeping lock which is also used by ktime_get_real_seconds(), we need a version without locking. Add a new function __ktime_get_real_seconds() in timekeeping to do this. Reviewed-by: John Stultz Signed-off-by: DengChao Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 13 +++++++++++++ kernel/time/timekeeping_internal.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d9249daf14ba..21cc23918cbd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -845,6 +845,19 @@ time64_t ktime_get_real_seconds(void) } EXPORT_SYMBOL_GPL(ktime_get_real_seconds); +/** + * __ktime_get_real_seconds - The same as ktime_get_real_seconds + * but without the sequence counter protect. This internal function + * is called just when timekeeping lock is already held. + */ +time64_t __ktime_get_real_seconds(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return tk->xtime_sec; +} + + #ifdef CONFIG_NTP_PPS /** diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 4ea005a7f9da..e20466ffc208 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -26,4 +26,6 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask) } #endif +extern time64_t __ktime_get_real_seconds(void); + #endif /* _TIMEKEEPING_INTERNAL_H */ -- cgit v1.2.3 From 0af864651b459afb0435ee8786a19cbe5a044cdb Mon Sep 17 00:00:00 2001 From: DengChao Date: Sun, 13 Dec 2015 12:24:19 +0800 Subject: ntp: Change time_reftime to time64_t and utilize 64bit __ktime_get_real_seconds The type of static variant "time_reftime" and the call of get_seconds in ntp are both not y2038 safe. So change the type of time_reftime to time64_t and replace get_seconds with __ktime_get_real_seconds. The local variant "secs" in ntp_update_offset represents seconds between now and last ntp adjustment, it seems impossible that this time will last more than 68 years, so keep its type as "long". Reviewed-by: John Stultz Signed-off-by: DengChao [jstultz: Tweaked commit message] Signed-off-by: John Stultz --- kernel/time/ntp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4073c9550af9..e947bfddd2c2 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -18,6 +18,8 @@ #include #include "ntp_internal.h" +#include "timekeeping_internal.h" + /* * NTP timekeeping variables: @@ -70,7 +72,7 @@ static long time_esterror = NTP_PHASE_LIMIT; static s64 time_freq; /* time at last adjustment (secs): */ -static long time_reftime; +static time64_t time_reftime; static long time_adjust; @@ -313,11 +315,11 @@ static void ntp_update_offset(long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - secs = get_seconds() - time_reftime; + secs = (long)(__ktime_get_real_seconds() - time_reftime); if (unlikely(time_status & STA_FREQHOLD)) secs = 0; - time_reftime = get_seconds(); + time_reftime = __ktime_get_real_seconds(); offset64 = offset; freq_adj = ntp_update_offset_fll(offset64, secs); @@ -592,7 +594,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts) * reference time to current time. */ if (!(time_status & STA_PLL) && (txc->status & STA_PLL)) - time_reftime = get_seconds(); + time_reftime = __ktime_get_real_seconds(); /* only set allowed bits */ time_status &= STA_RONLY; -- cgit v1.2.3 From c796348774f15c6e682834ed288bcae0f2c95707 Mon Sep 17 00:00:00 2001 From: DengChao Date: Sun, 13 Dec 2015 12:26:42 +0800 Subject: ntp: Fix second_overflow's input parameter type to be 64bits The function "second_overflow" uses "unsign long" as its input parameter type which will overflow after year 2106 on 32bit systems. Thus this patch replaces it with time64_t type. While the 64-bit division is expensive, "next_ntp_leap_sec" has been calculated already, so we can just re-use it in the TIME_INS/DEL cases, allowing one expensive division per leapsecond instead of re-doing the divsion once a second after the leap flag has been set. Signed-off-by: DengChao [jstultz: Tweaked commit message] Signed-off-by: John Stultz --- kernel/time/ntp.c | 16 +++++++++------- kernel/time/ntp_internal.h | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index e947bfddd2c2..36f2ca09aa5e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -394,10 +395,11 @@ ktime_t ntp_get_next_leap(void) * * Also handles leap second processing, and returns leap offset */ -int second_overflow(unsigned long secs) +int second_overflow(time64_t secs) { s64 delta; int leap = 0; + s32 rem; /* * Leap second processing. If in leap-insert state at the end of the @@ -408,19 +410,19 @@ int second_overflow(unsigned long secs) case TIME_OK: if (time_status & STA_INS) { time_state = TIME_INS; - ntp_next_leap_sec = secs + SECS_PER_DAY - - (secs % SECS_PER_DAY); + div_s64_rem(secs, SECS_PER_DAY, &rem); + ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } else if (time_status & STA_DEL) { time_state = TIME_DEL; - ntp_next_leap_sec = secs + SECS_PER_DAY - - ((secs+1) % SECS_PER_DAY); + div_s64_rem(secs + 1, SECS_PER_DAY, &rem); + ntp_next_leap_sec = secs + SECS_PER_DAY - rem; } break; case TIME_INS: if (!(time_status & STA_INS)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; - } else if (secs % SECS_PER_DAY == 0) { + } else if (secs == ntp_next_leap_sec) { leap = -1; time_state = TIME_OOP; printk(KERN_NOTICE @@ -431,7 +433,7 @@ int second_overflow(unsigned long secs) if (!(time_status & STA_DEL)) { ntp_next_leap_sec = TIME64_MAX; time_state = TIME_OK; - } else if ((secs + 1) % SECS_PER_DAY == 0) { + } else if (secs == ntp_next_leap_sec) { leap = 1; ntp_next_leap_sec = TIME64_MAX; time_state = TIME_WAIT; diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index af924470eac0..d8a7c11fa71a 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -6,7 +6,7 @@ extern void ntp_clear(void); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); -extern int second_overflow(unsigned long secs); +extern int second_overflow(time64_t secs); extern int ntp_validate_timex(struct timex *); extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); extern void __hardpps(const struct timespec64 *, const struct timespec64 *); -- cgit v1.2.3 From ec02b076ceab63f99e5b3d80fd223d777266c236 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 3 Dec 2015 10:23:30 -0800 Subject: timekeeping: Cap adjustments so they don't exceed the maxadj value Thus its been occasionally noted that users have seen confusing warnings like: Adjusting tsc more than 11% (5941981 vs 7759439) We try to limit the maximum total adjustment to 11% (10% tick adjustment + 0.5% frequency adjustment). But this is done by bounding the requested adjustment values, and the internal steering that is done by tracking the error from what was requested and what was applied, does not have any such limits. This is usually not problematic, but in some cases has a risk that an adjustment could cause the clocksource mult value to overflow, so its an indication things are outside of what is expected. It ends up most of the reports of this 11% warning are on systems using chrony, which utilizes the adjtimex() ADJ_TICK interface (which allows a +-10% adjustment). The original rational for ADJ_TICK unclear to me but my assumption it was originally added to allow broken systems to get a big constant correction at boot (see adjtimex userspace package for an example) which would allow the system to work w/ ntpd's 0.5% adjustment limit. Chrony uses ADJ_TICK to make very aggressive short term corrections (usually right at startup). Which push us close enough to the max bound that a few late ticks can cause the internal steering to push past the max adjust value (tripping the warning). Thus this patch adds some extra logic to enforce the max adjustment cap in the internal steering. Note: This has the potential to slow corrections when the ADJ_TICK value is furthest away from the default value. So it would be good to get some testing from folks using chrony, to make sure we don't cause any troubles there. Cc: Miroslav Lichvar Cc: Thomas Gleixner Cc: Richard Cochran Cc: Prarit Bhargava Cc: Andy Lutomirski Tested-by: Miroslav Lichvar Reported-by: Andy Lutomirski Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 21cc23918cbd..34b4cedfa80d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1604,9 +1604,12 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, { s64 interval = tk->cycle_interval; s64 xinterval = tk->xtime_interval; + u32 base = tk->tkr_mono.clock->mult; + u32 max = tk->tkr_mono.clock->maxadj; + u32 cur_adj = tk->tkr_mono.mult; s64 tick_error; bool negative; - u32 adj; + u32 adj_scale; /* Remove any current error adj from freq calculation */ if (tk->ntp_err_mult) @@ -1625,13 +1628,33 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, /* preserve the direction of correction */ negative = (tick_error < 0); - /* Sort out the magnitude of the correction */ + /* If any adjustment would pass the max, just return */ + if (negative && (cur_adj - 1) <= (base - max)) + return; + if (!negative && (cur_adj + 1) >= (base + max)) + return; + /* + * Sort out the magnitude of the correction, but + * avoid making so large a correction that we go + * over the max adjustment. + */ + adj_scale = 0; tick_error = abs(tick_error); - for (adj = 0; tick_error > interval; adj++) + while (tick_error > interval) { + u32 adj = 1 << (adj_scale + 1); + + /* Check if adjustment gets us within 1 unit from the max */ + if (negative && (cur_adj - adj) <= (base - max)) + break; + if (!negative && (cur_adj + adj) >= (base + max)) + break; + + adj_scale++; tick_error >>= 1; + } /* scale the corrections */ - timekeeping_apply_adjustment(tk, offset, negative, adj); + timekeeping_apply_adjustment(tk, offset, negative, adj_scale); } /* -- cgit v1.2.3