From 7f852afe448c95691ead6b57bae5f37562d060b5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 17 Jan 2018 14:01:28 +0800 Subject: clocksource: Don't walk the clocksource list for empty override If the override clocksource name is empty there is no point in walking the clocksource list for a match. Signed-off-by: Baolin Wang Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: sboyd@codeaurora.org Cc: broonie@kernel.org Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/069ce2a605546bcad6552968cff755f0a03f9f10.1516167691.git.baolin.wang@linaro.org --- kernel/time/clocksource.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 65f9e3f24dde..c5fdcb13200f 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -594,6 +594,9 @@ static void __clocksource_select(bool skipcur) if (!best) return; + if (!strlen(override_name)) + goto found; + /* Check for the override clocksource. */ list_for_each_entry(cs, &clocksource_list, list) { if (skipcur && cs == curr_clocksource) @@ -625,6 +628,7 @@ static void __clocksource_select(bool skipcur) break; } +found: if (curr_clocksource != best && !timekeeping_notify(best)) { pr_info("Switched to clocksource %s\n", best->name); curr_clocksource = best; -- cgit v1.2.3 From e87821d18cf4db19d634a04061c0a1b7eb9c0e65 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 17 Jan 2018 14:01:29 +0800 Subject: clocksource: Use DEVICE_ATTR_RW/RO/WO to define device attributes Convert DEVICE_ATTR to DEVICE_ATTR_RW/RO/WO which is the preferred and simpler way of implementation. Signed-off-by: Baolin Wang Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: sboyd@codeaurora.org Cc: broonie@kernel.org Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/8f35c77e753e957b61187e8e7b2e4a3d61e4a72b.1516167691.git.baolin.wang@linaro.org --- kernel/time/clocksource.c | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c5fdcb13200f..7ce53465782b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -857,16 +857,16 @@ EXPORT_SYMBOL(clocksource_unregister); #ifdef CONFIG_SYSFS /** - * sysfs_show_current_clocksources - sysfs interface for current clocksource + * current_clocksource_show - sysfs interface for current clocksource * @dev: unused * @attr: unused * @buf: char buffer to be filled with clocksource list * * Provides sysfs interface for listing current clocksource. */ -static ssize_t -sysfs_show_current_clocksources(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t current_clocksource_show(struct device *dev, + struct device_attribute *attr, + char *buf) { ssize_t count = 0; @@ -895,7 +895,7 @@ ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) } /** - * sysfs_override_clocksource - interface for manually overriding clocksource + * current_clocksource_store - interface for manually overriding clocksource * @dev: unused * @attr: unused * @buf: name of override clocksource @@ -904,9 +904,9 @@ ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) * Takes input from sysfs interface for manually overriding the default * clocksource selection. */ -static ssize_t sysfs_override_clocksource(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t current_clocksource_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { ssize_t ret; @@ -920,9 +920,10 @@ static ssize_t sysfs_override_clocksource(struct device *dev, return ret; } +static DEVICE_ATTR_RW(current_clocksource); /** - * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource + * unbind_clocksource_store - interface for manually unbinding clocksource * @dev: unused * @attr: unused * @buf: unused @@ -930,7 +931,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev, * * Takes input from sysfs interface for manually unbinding a clocksource. */ -static ssize_t sysfs_unbind_clocksource(struct device *dev, +static ssize_t unbind_clocksource_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -954,19 +955,19 @@ static ssize_t sysfs_unbind_clocksource(struct device *dev, return ret ? ret : count; } +static DEVICE_ATTR_WO(unbind_clocksource); /** - * sysfs_show_available_clocksources - sysfs interface for listing clocksource + * available_clocksource_show - sysfs interface for listing clocksource * @dev: unused * @attr: unused * @buf: char buffer to be filled with clocksource list * * Provides sysfs interface for listing registered clocksources */ -static ssize_t -sysfs_show_available_clocksources(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t available_clocksource_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct clocksource *src; ssize_t count = 0; @@ -990,17 +991,7 @@ sysfs_show_available_clocksources(struct device *dev, return count; } - -/* - * Sysfs setup bits: - */ -static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, - sysfs_override_clocksource); - -static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); - -static DEVICE_ATTR(available_clocksource, 0444, - sysfs_show_available_clocksources, NULL); +static DEVICE_ATTR_RO(available_clocksource); static struct bus_type clocksource_subsys = { .name = "clocksource", -- cgit v1.2.3 From 27263e8dc0f6fe27540a843611ec14a000591c41 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 17 Jan 2018 14:01:30 +0800 Subject: clocksource: Use ATTRIBUTE_GROUPS Use ATTRIBUTE_GROUPS instead of manually creating the individual device files. Signed-off-by: Baolin Wang Signed-off-by: Thomas Gleixner Cc: arnd@arndb.de Cc: sboyd@codeaurora.org Cc: broonie@kernel.org Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/d80dccb981dc2461781ebb8d71a32ccdc1b0e6f9.1516167691.git.baolin.wang@linaro.org --- kernel/time/clocksource.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7ce53465782b..0e974cface0b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -993,6 +993,14 @@ static ssize_t available_clocksource_show(struct device *dev, } static DEVICE_ATTR_RO(available_clocksource); +static struct attribute *clocksource_attrs[] = { + &dev_attr_current_clocksource.attr, + &dev_attr_unbind_clocksource.attr, + &dev_attr_available_clocksource.attr, + NULL +}; +ATTRIBUTE_GROUPS(clocksource); + static struct bus_type clocksource_subsys = { .name = "clocksource", .dev_name = "clocksource", @@ -1001,6 +1009,7 @@ static struct bus_type clocksource_subsys = { static struct device device_clocksource = { .id = 0, .bus = &clocksource_subsys, + .groups = clocksource_groups, }; static int __init init_clocksource_sysfs(void) @@ -1009,17 +1018,7 @@ static int __init init_clocksource_sysfs(void) if (!error) error = device_register(&device_clocksource); - if (!error) - error = device_create_file( - &device_clocksource, - &dev_attr_current_clocksource); - if (!error) - error = device_create_file(&device_clocksource, - &dev_attr_unbind_clocksource); - if (!error) - error = device_create_file( - &device_clocksource, - &dev_attr_available_clocksource); + return error; } -- cgit v1.2.3 From c2cda2a5bda9f1369c9d1ab54a20571c13cf2743 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Mar 2018 10:42:47 -0800 Subject: timekeeping/ntp: Don't align NTP frequency adjustments to ticks When the timekeeping multiplier is changed, the NTP error is updated to correct the clock for the delay between the tick and the update of the clock. This error is corrected in later updates and the clock appears as if the frequency was changed exactly on the tick. Remove this correction to keep the point where the frequency is effectively changed at the time of the update. This removes a major source of the NTP error. Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1520620971-9567-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cd03317e7b57..c1a0ac17336e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1860,8 +1860,6 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, * xtime_nsec_2 = xtime_nsec_1 - offset * Which simplfies to: * xtime_nsec -= offset - * - * XXX - TODO: Doc ntp_error calculation. */ if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */ @@ -1872,7 +1870,6 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, tk->tkr_mono.mult += mult_adj; tk->xtime_interval += interval; tk->tkr_mono.xtime_nsec -= offset; - tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; } /* -- cgit v1.2.3 From 78b98e3c5a66d569a53b8f57b6a698f912794a43 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Mar 2018 10:42:48 -0800 Subject: timekeeping/ntp: Determine the multiplier directly from NTP tick length When the length of the NTP tick changes significantly, e.g. when an NTP/PTP application is correcting the initial offset of the clock, a large value may accumulate in the NTP error before the multiplier converges to the correct value. It may then take a very long time (hours or even days) before the error is corrected. This causes the clock to have an unstable frequency offset, which has a negative impact on the stability of synchronization with precise time sources (e.g. NTP/PTP using hardware timestamping or the PTP KVM clock). Use division to determine the correct multiplier directly from the NTP tick length and replace the iterative approach. This removes the last major source of the NTP error. The only remaining source is now limited resolution of the multiplier, which is corrected by adding 1 to the multiplier when the system clock is behind the NTP time. Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1520620971-9567-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 2 + kernel/time/timekeeping.c | 138 ++++++++++++------------------------ 2 files changed, 49 insertions(+), 91 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index d315c3d6725c..7acb953298a7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -117,6 +117,8 @@ struct timekeeper { s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; + /* Flag used to avoid updating NTP twice with same second */ + u32 skip_second_overflow; #ifdef CONFIG_DEBUG_TIMEKEEPING long last_warning; /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c1a0ac17336e..e11760121cb2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -332,6 +332,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->tkr_mono.mult = clock->mult; tk->tkr_raw.mult = clock->mult; tk->ntp_err_mult = 0; + tk->skip_second_overflow = 0; } /* Timekeeper helper functions. */ @@ -1799,20 +1800,19 @@ device_initcall(timekeeping_init_ops); */ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, s64 offset, - bool negative, - int adj_scale) + s32 mult_adj) { s64 interval = tk->cycle_interval; - s32 mult_adj = 1; - if (negative) { - mult_adj = -mult_adj; + if (mult_adj == 0) { + return; + } else if (mult_adj == -1) { interval = -interval; - offset = -offset; + offset = -offset; + } else if (mult_adj != 1) { + interval *= mult_adj; + offset *= mult_adj; } - mult_adj <<= adj_scale; - interval <<= adj_scale; - offset <<= adj_scale; /* * So the following can be confusing. @@ -1873,85 +1873,35 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, } /* - * Calculate the multiplier adjustment needed to match the frequency - * specified by NTP + * Adjust the timekeeper's multiplier to the correct frequency + * and also to reduce the accumulated error value. */ -static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, - s64 offset) +static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - s64 interval = tk->cycle_interval; - s64 xinterval = tk->xtime_interval; - u32 base = tk->tkr_mono.clock->mult; - u32 max = tk->tkr_mono.clock->maxadj; - u32 cur_adj = tk->tkr_mono.mult; - s64 tick_error; - bool negative; - u32 adj_scale; - - /* Remove any current error adj from freq calculation */ - if (tk->ntp_err_mult) - xinterval -= tk->cycle_interval; - - tk->ntp_tick = ntp_tick_length(); - - /* Calculate current error per tick */ - tick_error = ntp_tick_length() >> tk->ntp_error_shift; - tick_error -= (xinterval + tk->xtime_remainder); - - /* Don't worry about correcting it if its small */ - if (likely((tick_error >= 0) && (tick_error <= interval))) - return; - - /* preserve the direction of correction */ - negative = (tick_error < 0); + u32 mult; - /* If any adjustment would pass the max, just return */ - if (negative && (cur_adj - 1) <= (base - max)) - return; - if (!negative && (cur_adj + 1) >= (base + max)) - return; /* - * Sort out the magnitude of the correction, but - * avoid making so large a correction that we go - * over the max adjustment. + * Determine the multiplier from the current NTP tick length. + * Avoid expensive division when the tick length doesn't change. */ - adj_scale = 0; - tick_error = abs(tick_error); - while (tick_error > interval) { - u32 adj = 1 << (adj_scale + 1); - - /* Check if adjustment gets us within 1 unit from the max */ - if (negative && (cur_adj - adj) <= (base - max)) - break; - if (!negative && (cur_adj + adj) >= (base + max)) - break; - - adj_scale++; - tick_error >>= 1; + if (likely(tk->ntp_tick == ntp_tick_length())) { + mult = tk->tkr_mono.mult - tk->ntp_err_mult; + } else { + tk->ntp_tick = ntp_tick_length(); + mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - + tk->xtime_remainder, tk->cycle_interval); } - /* scale the corrections */ - timekeeping_apply_adjustment(tk, offset, negative, adj_scale); -} + /* + * If the clock is behind the NTP time, increase the multiplier by 1 + * to catch up with it. If it's ahead and there was a remainder in the + * tick division, the clock will slow down. Otherwise it will stay + * ahead until the tick length changes to a non-divisible value. + */ + tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; + mult += tk->ntp_err_mult; -/* - * Adjust the timekeeper's multiplier to the correct frequency - * and also to reduce the accumulated error value. - */ -static void timekeeping_adjust(struct timekeeper *tk, s64 offset) -{ - /* Correct for the current frequency error */ - timekeeping_freqadjust(tk, offset); - - /* Next make a small adjustment to fix any cumulative error */ - if (!tk->ntp_err_mult && (tk->ntp_error > 0)) { - tk->ntp_err_mult = 1; - timekeeping_apply_adjustment(tk, offset, 0, 0); - } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) { - /* Undo any existing error adjustment */ - timekeeping_apply_adjustment(tk, offset, 1, 0); - tk->ntp_err_mult = 0; - } + timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); if (unlikely(tk->tkr_mono.clock->maxadj && (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) @@ -1968,18 +1918,15 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * in the code above, its possible the required corrective factor to * xtime_nsec could cause it to underflow. * - * Now, since we already accumulated the second, cannot simply roll - * the accumulated second back, since the NTP subsystem has been - * notified via second_overflow. So instead we push xtime_nsec forward - * by the amount we underflowed, and add that amount into the error. - * - * We'll correct this error next time through this function, when - * xtime_nsec is not as small. + * Now, since we have already accumulated the second and the NTP + * subsystem has been notified via second_overflow(), we need to skip + * the next update. */ if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { - s64 neg = -(s64)tk->tkr_mono.xtime_nsec; - tk->tkr_mono.xtime_nsec = 0; - tk->ntp_error += neg << tk->ntp_error_shift; + tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << + tk->tkr_mono.shift; + tk->xtime_sec--; + tk->skip_second_overflow = 1; } } @@ -2002,6 +1949,15 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) tk->tkr_mono.xtime_nsec -= nsecps; tk->xtime_sec++; + /* + * Skip NTP update if this second was accumulated before, + * i.e. xtime_nsec underflowed in timekeeping_adjust() + */ + if (unlikely(tk->skip_second_overflow)) { + tk->skip_second_overflow = 0; + continue; + } + /* Figure out if its a leap sec and apply if needed */ leap = second_overflow(tk->xtime_sec); if (unlikely(leap)) { @@ -2118,7 +2074,7 @@ void update_wall_time(void) shift--; } - /* correct the clock when NTP error is too big */ + /* Adjust the multiplier to correct NTP error */ timekeeping_adjust(tk, offset); /* -- cgit v1.2.3 From 72199320d49dbafa1a99f94f1cd60dc90035c154 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:32 +0100 Subject: timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock The planned change to unify the behaviour of the MONOTONIC and BOOTTIME clocks vs. suspend removes the ability to retrieve the active non-suspended time of a system. Provide a new CLOCK_MONOTONIC_ACTIVE clock which returns the active non-suspended time of the system via clock_gettime(). This preserves the old behaviour of CLOCK_MONOTONIC before the BOOTTIME/MONOTONIC unification. This new clock also allows applications to detect programmatically that the MONOTONIC and BOOTTIME clocks are identical. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165149.965235774@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 2 ++ include/linux/timekeeping.h | 1 + include/uapi/linux/time.h | 1 + kernel/time/posix-stubs.c | 2 ++ kernel/time/posix-timers.c | 13 +++++++++++++ kernel/time/timekeeping.c | 36 ++++++++++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7acb953298a7..4b3dca173e89 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -52,6 +52,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai + * @time_suspended: Accumulated suspend time * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events @@ -94,6 +95,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; + ktime_t time_suspended; s32 tai_offset; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b17bcce58bc4..440b1935d3a5 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -32,6 +32,7 @@ extern void getrawmonotonic64(struct timespec64 *ts); extern void ktime_get_ts64(struct timespec64 *ts); extern time64_t ktime_get_seconds(void); extern time64_t ktime_get_real_seconds(void); +extern void ktime_get_active_ts64(struct timespec64 *ts); extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 53f8dd84beb5..61a187df8da2 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -61,6 +61,7 @@ struct itimerval { */ #define CLOCK_SGI_CYCLE 10 #define CLOCK_TAI 11 +#define CLOCK_MONOTONIC_ACTIVE 12 #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index b258bee13b02..6259dbc0191a 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -73,6 +73,8 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp) case CLOCK_BOOTTIME: get_monotonic_boottime64(tp); break; + case CLOCK_MONOTONIC_ACTIVE: + ktime_get_active_ts64(tp); default: return -EINVAL; } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 75043046914e..556fe02a47a4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -263,6 +263,13 @@ static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) return 0; } +static int posix_get_monotonic_active(clockid_t which_clock, + struct timespec64 *tp) +{ + ktime_get_active_ts64(tp); + return 0; +} + static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; @@ -1330,6 +1337,11 @@ static const struct k_clock clock_boottime = { .timer_arm = common_hrtimer_arm, }; +static const struct k_clock clock_monotonic_active = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_get_monotonic_active, +}; + static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME] = &clock_realtime, [CLOCK_MONOTONIC] = &clock_monotonic, @@ -1342,6 +1354,7 @@ static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, + [CLOCK_MONOTONIC_ACTIVE] = &clock_monotonic_active, }; static const struct k_clock *clockid_to_kclock(const clockid_t id) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e11760121cb2..a2b7f583e64e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -139,6 +139,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { tk->offs_boot = ktime_add(tk->offs_boot, delta); + + /* Accumulate time spent in suspend */ + tk->time_suspended += delta; } /* @@ -886,6 +889,39 @@ void ktime_get_ts64(struct timespec64 *ts) } EXPORT_SYMBOL_GPL(ktime_get_ts64); +/** + * ktime_get_active_ts64 - Get the active non-suspended monotonic clock + * @ts: pointer to timespec variable + * + * The function calculates the monotonic clock from the realtime clock and + * the wall_to_monotonic offset, subtracts the accumulated suspend time and + * stores the result in normalized timespec64 format in the variable + * pointed to by @ts. + */ +void ktime_get_active_ts64(struct timespec64 *ts) +{ + struct timekeeper *tk = &tk_core.timekeeper; + struct timespec64 tomono, tsusp; + u64 nsec, nssusp; + unsigned int seq; + + WARN_ON(timekeeping_suspended); + + do { + seq = read_seqcount_begin(&tk_core.seq); + ts->tv_sec = tk->xtime_sec; + nsec = timekeeping_get_ns(&tk->tkr_mono); + tomono = tk->wall_to_monotonic; + nssusp = tk->time_suspended; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + ts->tv_sec += tomono.tv_sec; + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsec + tomono.tv_nsec); + tsusp = ns_to_timespec64(nssusp); + *ts = timespec64_sub(*ts, tsusp); +} + /** * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC * -- cgit v1.2.3 From d6ed449afdb38f89a7b38ec50e367559e1b8f71f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:33 +0100 Subject: timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock The MONOTONIC clock is not fast forwarded by the time spent in suspend on resume. This is only done for the BOOTTIME clock. The reason why the MONOTONIC clock is not forwarded is historical: the original Linux implementation was using jiffies as a base for the MONOTONIC clock and jiffies have never been advanced after resume. At some point when timekeeping was unified in the core code, the MONONOTIC clock was advanced after resume which also advanced jiffies causing interesting side effects. As a consequence the the MONOTONIC clock forwarding was disabled again and the BOOTTIME clock was introduced, which allows to read time since boot. Back then it was not possible to completely distangle the MONOTONIC clock and jiffies because there were still interfaces which exposed the MONOTONIC clock behaviour based on the timer wheel and therefore jiffies. As of today none of the MONOTONIC clock facilities depends on jiffies anymore so the forwarding can be done seperately. This is achieved by forwarding the variables which are used for the jiffies update after resume before the tick is restarted, In timekeeping resume, the change is rather simple. Instead of updating the offset between the MONOTONIC clock and the REALTIME/BOOTTIME clocks, advance the time keeper base for the MONOTONIC and the MONOTONIC_RAW clocks by the time spent in suspend. The MONOTONIC clock is now the same as the BOOTTIME clock and the offset between the REALTIME and the MONOTONIC clocks is the same as before suspend. There might be side effects in applications, which rely on the (unfortunately) well documented behaviour of the MONOTONIC clock, but the downsides of the existing behaviour are probably worse. There is one obvious issue. Up to now it was possible to retrieve the time spent in suspend by observing the delta between the MONOTONIC clock and the BOOTTIME clock. This is not longer available, but the previously introduced mechanism to read the active non-suspended monotonic time can mitigate that in a detectable fashion. Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.062975504@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 15 +++++++++++++++ kernel/time/tick-internal.h | 6 ++++++ kernel/time/tick-sched.c | 9 +++++++++ kernel/time/timekeeping.c | 7 ++++--- 4 files changed, 34 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 49edc1c4f3e6..099572ca4a8f 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -419,6 +419,19 @@ void tick_suspend_local(void) clockevents_shutdown(td->evtdev); } +static void tick_forward_next_period(void) +{ + ktime_t delta, now = ktime_get(); + u64 n; + + delta = ktime_sub(now, tick_next_period); + n = ktime_divns(delta, tick_period); + tick_next_period += n * tick_period; + if (tick_next_period < now) + tick_next_period += tick_period; + tick_sched_forward_next_period(); +} + /** * tick_resume_local - Resume the local tick device * @@ -431,6 +444,8 @@ void tick_resume_local(void) struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool broadcast = tick_resume_check_broadcast(); + tick_forward_next_period(); + clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index e277284c2831..21efab7485ca 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -141,6 +141,12 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } #endif /* !(BROADCAST && ONESHOT) */ +#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) +extern void tick_sched_forward_next_period(void); +#else +static inline void tick_sched_forward_next_period(void) { } +#endif + /* NO_HZ_FULL internal */ #ifdef CONFIG_NO_HZ_FULL extern void tick_nohz_init(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 29a5733eff83..f53e37b5d248 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -51,6 +51,15 @@ struct tick_sched *tick_get_tick_sched(int cpu) */ static ktime_t last_jiffies_update; +/* + * Called after resume. Make sure that jiffies are not fast forwarded due to + * clock monotonic being forwarded by the suspended time. + */ +void tick_sched_forward_next_period(void) +{ + last_jiffies_update = tick_next_period; +} + /* * Must be called with interrupts disabled ! */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a2b7f583e64e..b509fe7acd64 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -138,7 +138,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { - tk->offs_boot = ktime_add(tk->offs_boot, delta); + /* Update both bases so mono and raw stay coupled. */ + tk->tkr_mono.base += delta; + tk->tkr_raw.base += delta; /* Accumulate time spent in suspend */ tk->time_suspended += delta; @@ -1622,7 +1624,6 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, return; } tk_xtime_add(tk, delta); - tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); tk_debug_account_sleep_time(delta); } @@ -2155,7 +2156,7 @@ out: void getboottime64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; - ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); + ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended); *ts = ktime_to_timespec64(t); } -- cgit v1.2.3 From d6c7270e913db75ca5fdc79915ba780e97ae2857 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:35 +0100 Subject: timekeeping: Remove boot time specific code Now that the MONOTONIC and BOOTTIME clocks are the same, remove all the special handling from timekeeping. Keep wrappers for the existing users of the *boot* timekeeper interfaces. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.236279497@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 42 +++++++++++++++++------------------------- kernel/time/timekeeping.c | 31 ------------------------------- 2 files changed, 17 insertions(+), 56 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 440b1935d3a5..abb396731332 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -38,15 +38,19 @@ extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); extern void getboottime64(struct timespec64 *ts); -#define ktime_get_real_ts64(ts) getnstimeofday64(ts) +#define ktime_get_real_ts64(ts) getnstimeofday64(ts) + +/* Clock BOOTTIME compatibility wrappers */ +static inline void get_monotonic_boottime64(struct timespec64 *ts) +{ + ktime_get_ts64(ts); +} /* * ktime_t based interfaces */ - enum tk_offsets { TK_OFFS_REAL, - TK_OFFS_BOOT, TK_OFFS_TAI, TK_OFFS_MAX, }; @@ -57,6 +61,10 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); extern u32 ktime_get_resolution_ns(void); +/* Clock BOOTTIME compatibility wrappers */ +static inline ktime_t ktime_get_boottime(void) { return ktime_get(); } +static inline u64 ktime_get_boot_ns(void) { return ktime_get(); } + /** * ktime_get_real - get the real (wall-) time in ktime_t format */ @@ -65,17 +73,6 @@ static inline ktime_t ktime_get_real(void) return ktime_get_with_offset(TK_OFFS_REAL); } -/** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format - * - * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the - * time spent in suspend. - */ -static inline ktime_t ktime_get_boottime(void) -{ - return ktime_get_with_offset(TK_OFFS_BOOT); -} - /** * ktime_get_clocktai - Returns the TAI time of day in ktime_t format */ @@ -102,11 +99,6 @@ static inline u64 ktime_get_real_ns(void) return ktime_to_ns(ktime_get_real()); } -static inline u64 ktime_get_boot_ns(void) -{ - return ktime_to_ns(ktime_get_boottime()); -} - static inline u64 ktime_get_tai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); @@ -119,17 +111,17 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); -extern u64 ktime_get_boot_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); -/* - * timespec64 interfaces utilizing the ktime based ones - */ -static inline void get_monotonic_boottime64(struct timespec64 *ts) +/* Clock BOOTTIME compatibility wrappers */ +static inline u64 ktime_get_boot_fast_ns(void) { - *ts = ktime_to_timespec64(ktime_get_boottime()); + return ktime_get_mono_fast_ns(); } +/* + * timespec64 interfaces utilizing the ktime based ones + */ static inline void timekeeping_clocktai64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_clocktai()); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b509fe7acd64..8355c8803282 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -473,36 +473,6 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); -/** - * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. - * - * To keep it NMI safe since we're accessing from tracing, we're not using a - * separate timekeeper with updates to monotonic clock and boot offset - * protected with seqlocks. This has the following minor side effects: - * - * (1) Its possible that a timestamp be taken after the boot offset is updated - * but before the timekeeper is updated. If this happens, the new boot offset - * is added to the old timekeeping making the clock appear to update slightly - * earlier: - * CPU 0 CPU 1 - * timekeeping_inject_sleeptime64() - * __timekeeping_inject_sleeptime(tk, delta); - * timestamp(); - * timekeeping_update(tk, TK_CLEAR_NTP...); - * - * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be - * partially updated. Since the tk->offs_boot update is a rare event, this - * should be a rare occurrence which postprocessing should be able to handle. - */ -u64 notrace ktime_get_boot_fast_ns(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - - return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); -} -EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); - - /* * See comment for __ktime_get_fast_ns() vs. timestamp ordering */ @@ -794,7 +764,6 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); static ktime_t *offsets[TK_OFFS_MAX] = { [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, - [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, }; -- cgit v1.2.3 From 7250a4047aa6106006c2c9b5aff91d7d3fb77962 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:36 +0100 Subject: posix-timers: Unify MONOTONIC and BOOTTIME clock behavior Now that the MONOTONIC and BOOTTIME clocks are indentical remove all the special casing. The user space visible interfaces still support both clocks, but their behavior is identical. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.315745557@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/posix-timers.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 556fe02a47a4..8cf95bfee44f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -251,12 +251,6 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 * return 0; } -static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) -{ - get_monotonic_boottime64(tp); - return 0; -} - static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) { timekeeping_clocktai64(tp); @@ -1322,21 +1316,6 @@ static const struct k_clock clock_tai = { .timer_arm = common_hrtimer_arm, }; -static const struct k_clock clock_boottime = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_boottime, - .nsleep = common_nsleep, - .timer_create = common_timer_create, - .timer_set = common_timer_set, - .timer_get = common_timer_get, - .timer_del = common_timer_del, - .timer_rearm = common_hrtimer_rearm, - .timer_forward = common_hrtimer_forward, - .timer_remaining = common_hrtimer_remaining, - .timer_try_to_cancel = common_hrtimer_try_to_cancel, - .timer_arm = common_hrtimer_arm, -}; - static const struct k_clock clock_monotonic_active = { .clock_getres = posix_get_hrtimer_res, .clock_get = posix_get_monotonic_active, @@ -1350,7 +1329,7 @@ static const struct k_clock * const posix_clocks[] = { [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, - [CLOCK_BOOTTIME] = &clock_boottime, + [CLOCK_BOOTTIME] = &clock_monotonic, [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, -- cgit v1.2.3 From 127bfa5f4342e63d83a0b07ece376c2e8878e4a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:37 +0100 Subject: hrtimer: Unify MONOTONIC and BOOTTIME clock behavior Now that th MONOTONIC and BOOTTIME clocks are indentical remove all the special casing. The user space visible interfaces still support both clocks, but their behavior is identical. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.410218515@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 -- kernel/time/hrtimer.c | 16 ++-------------- kernel/time/timekeeping.c | 4 +--- kernel/time/timekeeping.h | 1 - 4 files changed, 3 insertions(+), 20 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c7902ca7c9f4..78f456fcd242 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,11 +161,9 @@ struct hrtimer_clock_base { enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, HRTIMER_BASE_MONOTONIC_SOFT, HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 23788100e214..9b082ce86325 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -90,11 +90,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, - { - .index = HRTIMER_BASE_BOOTTIME, - .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, - }, { .index = HRTIMER_BASE_TAI, .clockid = CLOCK_TAI, @@ -110,11 +105,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, }, - { - .index = HRTIMER_BASE_BOOTTIME_SOFT, - .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, - }, { .index = HRTIMER_BASE_TAI_SOFT, .clockid = CLOCK_TAI, @@ -129,7 +119,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, - [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, + [CLOCK_BOOTTIME] = HRTIMER_BASE_MONOTONIC, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; @@ -565,14 +555,12 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; - ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, - offs_real, offs_boot, offs_tai); + offs_real, offs_tai); base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; - base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; return now; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8355c8803282..ca90219a1e73 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2195,7 +2195,6 @@ void do_timer(unsigned long ticks) * ktime_get_update_offsets_now - hrtimer helper * @cwsseq: pointer to check and store the clock was set sequence number * @offs_real: pointer to storage for monotonic -> realtime offset - * @offs_boot: pointer to storage for monotonic -> boottime offset * @offs_tai: pointer to storage for monotonic -> clock tai offset * * Returns current monotonic time and updates the offsets if the @@ -2205,7 +2204,7 @@ void do_timer(unsigned long ticks) * Called from hrtimer_interrupt() or retrigger_next_event() */ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, - ktime_t *offs_boot, ktime_t *offs_tai) + ktime_t *offs_tai) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; @@ -2222,7 +2221,6 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, if (*cwsseq != tk->clock_was_set_seq) { *cwsseq = tk->clock_was_set_seq; *offs_real = tk->offs_real; - *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; } diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 7a9b4eb7a1d5..79b67f5e0343 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -6,7 +6,6 @@ */ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, - ktime_t *offs_boot, ktime_t *offs_tai); extern int timekeeping_valid_for_hres(void); -- cgit v1.2.3 From a84d1169164b274f13b97a23ff235c000efe3b49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Mar 2018 17:12:40 +0100 Subject: y2038: Introduce struct __kernel_old_timeval Dealing with 'struct timeval' users in the y2038 series is a bit tricky: We have two definitions of timeval that are visible to user space, one comes from glibc (or some other C library), the other comes from linux/time.h. The kernel copy is what we want to be used for a number of structures defined by the kernel itself, e.g. elf_prstatus (used it core dumps), sysinfo and rusage (used in system calls). These generally tend to be used for passing time intervals rather than absolute (epoch-based) times, so they do not suffer from the y2038 overflow. Some of them could be changed to use 64-bit timestamps by creating new system calls, others like the core files cannot easily be changed. An application using these interfaces likely also uses gettimeofday() or other interfaces that use absolute times, and pass 'struct timeval' pointers directly into kernel interfaces, so glibc must redefine their timeval based on a 64-bit time_t when they introduce their y2038-safe interfaces. The only reasonable way forward I see is to remove the 'timeval' definion from the kernel's uapi headers, and change the interfaces that we do not want to (or cannot) duplicate for 64-bit times to use a new __kernel_old_timeval definition instead. This type should be avoided for all new interfaces (those can use 64-bit nanoseconds, or the 64-bit version of timespec instead), and should be used with great care when converting existing interfaces from timeval, to be sure they don't suffer from the y2038 overflow, and only with consensus for the particular user that using __kernel_old_timeval is better than moving to a 64-bit based interface. The structure name is intentionally chosen to not conflict with user space types, and to be ugly enough to discourage its use. Note that ioctl based interfaces that pass a bare 'timeval' pointer cannot change to '__kernel_old_timeval' because the user space source code refers to 'timeval' instead, and we don't want to modify the user space sources if possible. However, any application that relies on a structure to contain an embedded 'timeval' (e.g. by passing a pointer to the member into a function call that expects a timeval pointer) is broken when that structure gets converted to __kernel_old_timeval. I don't see any way around that, and we have to rely on the compiler to produce a warning or compile failure that will alert users when they recompile their sources against a new libc. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Stephen Boyd Cc: John Stultz Cc: Al Viro Link: https://lkml.kernel.org/r/20180315161739.576085-1-arnd@arndb.de --- include/linux/time32.h | 1 + include/uapi/linux/time.h | 12 ++++++++++++ kernel/time/time.c | 12 ++++++++++++ 3 files changed, 25 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/time32.h b/include/linux/time32.h index 65b1de25198d..d2bcd4377b56 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -217,5 +217,6 @@ static inline s64 timeval_to_ns(const struct timeval *tv) * Returns the timeval representation of the nsec parameter. */ extern struct timeval ns_to_timeval(const s64 nsec); +extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); #endif diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 61a187df8da2..16a296612ba4 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -42,6 +42,18 @@ struct itimerval { struct timeval it_value; /* current value */ }; +/* + * legacy timeval structure, only embedded in structures that + * traditionally used 'timeval' to pass time intervals (not absolute + * times). Do not add new users. If user space fails to compile + * here, this is probably because it is not y2038 safe and needs to + * be changed to use another interface. + */ +struct __kernel_old_timeval { + __kernel_long_t tv_sec; + __kernel_long_t tv_usec; +}; + /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ diff --git a/kernel/time/time.c b/kernel/time/time.c index bd4e6c7dd689..3044d48ebe56 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -488,6 +488,18 @@ struct timeval ns_to_timeval(const s64 nsec) } EXPORT_SYMBOL(ns_to_timeval); +struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec) +{ + struct timespec64 ts = ns_to_timespec64(nsec); + struct __kernel_old_timeval tv; + + tv.tv_sec = ts.tv_sec; + tv.tv_usec = (suseconds_t)ts.tv_nsec / 1000; + + return tv; +} +EXPORT_SYMBOL(ns_to_kernel_old_timeval); + /** * set_normalized_timespec - set timespec sec and nsec parts and normalize * -- cgit v1.2.3 From bd03143007eb9b03a7f2316c677780561b68ba2a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Mar 2018 15:29:57 +0200 Subject: alarmtimer: Init nanosleep alarm timer on stack syszbot reported the following debugobjects splat: ODEBUG: object is on stack, but not annotated WARNING: CPU: 0 PID: 4185 at lib/debugobjects.c:328 RIP: 0010:debug_object_is_on_stack lib/debugobjects.c:327 [inline] debug_object_init+0x17/0x20 lib/debugobjects.c:391 debug_hrtimer_init kernel/time/hrtimer.c:410 [inline] debug_init kernel/time/hrtimer.c:458 [inline] hrtimer_init+0x8c/0x410 kernel/time/hrtimer.c:1259 alarm_init kernel/time/alarmtimer.c:339 [inline] alarm_timer_nsleep+0x164/0x4d0 kernel/time/alarmtimer.c:787 SYSC_clock_nanosleep kernel/time/posix-timers.c:1226 [inline] SyS_clock_nanosleep+0x235/0x330 kernel/time/posix-timers.c:1204 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 This happens because the hrtimer for the alarm nanosleep is on stack, but the code does not use the proper debug objects initialization. Split out the code for the allocated use cases and invoke hrtimer_init_on_stack() for the nanosleep related functions. Reported-by: syzbot+a3e0726462b2e346a31d@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: syzkaller-bugs@googlegroups.com Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1803261528270.1585@nanos.tec.linutronix.de --- kernel/time/alarmtimer.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ec09ce9a6012..639321bf2e39 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -326,6 +326,17 @@ static int alarmtimer_resume(struct device *dev) } #endif +static void +__alarm_init(struct alarm *alarm, enum alarmtimer_type type, + enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) +{ + timerqueue_init(&alarm->node); + alarm->timer.function = alarmtimer_fired; + alarm->function = function; + alarm->type = type; + alarm->state = ALARMTIMER_STATE_INACTIVE; +} + /** * alarm_init - Initialize an alarm structure * @alarm: ptr to alarm to be initialized @@ -335,13 +346,9 @@ static int alarmtimer_resume(struct device *dev) void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { - timerqueue_init(&alarm->node); hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, - HRTIMER_MODE_ABS); - alarm->timer.function = alarmtimer_fired; - alarm->function = function; - alarm->type = type; - alarm->state = ALARMTIMER_STATE_INACTIVE; + HRTIMER_MODE_ABS); + __alarm_init(alarm, type, function); } EXPORT_SYMBOL_GPL(alarm_init); @@ -719,6 +726,8 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, __set_current_state(TASK_RUNNING); + destroy_hrtimer_on_stack(&alarm->timer); + if (!alarm->data) return 0; @@ -740,6 +749,15 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, return -ERESTART_RESTARTBLOCK; } +static void +alarm_init_on_stack(struct alarm *alarm, enum alarmtimer_type type, + enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) +{ + hrtimer_init_on_stack(&alarm->timer, alarm_bases[type].base_clockid, + HRTIMER_MODE_ABS); + __alarm_init(alarm, type, function); +} + /** * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep * @restart: ptr to restart block @@ -752,7 +770,7 @@ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) ktime_t exp = restart->nanosleep.expires; struct alarm alarm; - alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); return alarmtimer_do_nsleep(&alarm, exp, type); } @@ -784,7 +802,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (!capable(CAP_WAKE_ALARM)) return -EPERM; - alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); exp = timespec64_to_ktime(*tsreq); /* Convert (if necessary) to absolute time */ -- cgit v1.2.3