diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 11:06:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-08 11:06:29 -0700 |
commit | 927ba67a63c72ee87d655e30183d1576c3717d3e (patch) | |
tree | 85f941aeac5581ef6b612c5b5949f2e2896103c5 /arch/x86 | |
parent | 2a1ccd31420a7b1acd6ca37b2bec2d723aa093e4 (diff) | |
parent | 9176ab1b848059a0cd9caf39f0cebaa1b7ec5ec2 (diff) | |
download | linux-927ba67a63c72ee87d655e30183d1576c3717d3e.tar.bz2 |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner:
"The timer and timekeeping departement delivers:
Core:
- The consolidation of the VDSO code into a generic library including
the conversion of x86 and ARM64. Conversion of ARM and MIPS are en
route through the relevant maintainer trees and should end up in
5.4.
This gets rid of the unnecessary different copies of the same code
and brings all architectures on the same level of VDSO
functionality.
- Make the NTP user space interface more robust by restricting the
TAI offset to prevent undefined behaviour. Includes a selftest.
- Validate user input in the compat settimeofday() syscall to catch
invalid values which would be turned into valid values by a
multiplication overflow
- Consolidate the time accessors
- Small fixes, improvements and cleanups all over the place
Drivers:
- Support for the NXP system counter, TI davinci timer
- Move the Microsoft HyperV clocksource/events code into the
drivers/clocksource directory so it can be shared between x86 and
ARM64.
- Overhaul of the Tegra driver
- Delay timer support for IXP4xx
- Small fixes, improvements and cleanups as usual"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (71 commits)
time: Validate user input in compat_settimeofday()
timer: Document TIMER_PINNED
clocksource/drivers: Continue making Hyper-V clocksource ISA agnostic
clocksource/drivers: Make Hyper-V clocksource ISA agnostic
MAINTAINERS: Fix Andy's surname and the directory entries of VDSO
hrtimer: Use a bullet for the returns bullet list
arm64: vdso: Fix compilation with clang older than 8
arm64: compat: Fix __arch_get_hw_counter() implementation
arm64: Fix __arch_get_hw_counter() implementation
lib/vdso: Make delta calculation work correctly
MAINTAINERS: Add entry for the generic VDSO library
arm64: compat: No need for pre-ARMv7 barriers on an ARMv8 system
arm64: vdso: Remove unnecessary asm-offsets.c definitions
vdso: Remove superfluous #ifdef __KERNEL__ in vdso/datapage.h
clocksource/drivers/davinci: Add support for clocksource
clocksource/drivers/davinci: Add support for clockevents
clocksource/drivers/tegra: Set up maximum-ticks limit properly
clocksource/drivers/tegra: Cycles can't be 0
clocksource/drivers/tegra: Restore base address before cleanup
clocksource/drivers/tegra: Add verbose definition for 1MHz constant
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/entry/vdso/Makefile | 9 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 256 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso.lds.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso32/vdso32.lds.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdsox32.lds.S | 1 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vma.c | 2 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_gtod.c | 83 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 91 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 81 | ||||
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vdso/gettimeofday.h | 261 | ||||
-rw-r--r-- | arch/x86/include/asm/vdso/vsyscall.h | 44 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 75 | ||||
-rw-r--r-- | arch/x86/include/asm/vvar.h | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 13 |
21 files changed, 409 insertions, 540 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..51a98d6eae8e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_32 select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION + select GENERIC_VDSO_32 config X86_64 def_bool y @@ -121,6 +122,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI @@ -202,6 +204,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 42fe42e82baf..39106111be86 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,12 @@ # Building vDSO images for x86. # +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n @@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) + $(call if_changed,vdso_check) # # The DSO images are built using a special linker script. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 4aed41f638bb..d9ff616bb0f6 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -1,251 +1,85 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2006 Andi Kleen, SUSE Labs. - * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. */ - -#include <uapi/linux/time.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> -#include <asm/unistd.h> -#include <asm/msr.h> -#include <asm/pvclock.h> -#include <asm/mshyperv.h> -#include <linux/math64.h> #include <linux/time.h> #include <linux/kernel.h> +#include <linux/types.h> -#define gtod (&VVAR(vsyscall_gtod_data)) +#include "../../../../lib/vdso/gettimeofday.c" -extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); -extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifndef BUILD_VDSO32 - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - long ret; - asm ("syscall" : "=a" (ret), "=m" (*ts) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "rcx", "r11"); - return ret; + return __cvdso_gettimeofday(tv, tz); } -#else +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +time_t __vdso_time(time_t *t) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "edx"); - return ret; + return __cvdso_time(t); } -#endif +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -#ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} -static notrace u64 vread_pvclock(void) -{ - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u32 version; - u64 ret; - - /* - * Note: The kernel and hypervisor must guarantee that cpu ID - * number maps 1:1 to per-CPU pvclock time info. - * - * Because the hypervisor is entirely unaware of guest userspace - * preemption, it cannot guarantee that per-CPU pvclock time - * info is updated if the underlying CPU changes or that that - * version is increased whenever underlying CPU changes. - * - * On KVM, we are guaranteed that pvti updates for any vCPU are - * atomic as seen by *all* vCPUs. This is an even stronger - * guarantee than we get with a normal seqlock. - * - * On Xen, we don't appear to have that guarantee, but Xen still - * supplies a valid seqlock using the version field. - * - * We only do pvclock vdso timing at all if - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to - * mean that all vCPUs have matching pvti and that the TSC is - * synced, so we can just look at vCPU 0's pvti. - */ - - do { - version = pvclock_read_begin(pvti); - - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) - return U64_MAX; - - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); - } while (pvclock_read_retry(pvti, version)); - - return ret; -} -#endif -#ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(void) -{ - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); - return hv_read_tsc_page(tsc_pg); -} -#endif - -notrace static inline u64 vgetcyc(int mode) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - if (mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); - - /* - * For any memory-mapped vclock type, we need to make sure that gcc - * doesn't cleverly hoist a load before the mode check. Otherwise we - * might end up touching the memory-mapped page even if the vclock in - * question isn't enabled, which will segfault. Hence the barriers. - */ -#ifdef CONFIG_PARAVIRT_CLOCK - if (mode == VCLOCK_PVCLOCK) { - barrier(); - return vread_pvclock(); - } -#endif -#ifdef CONFIG_HYPERV_TSCPAGE - if (mode == VCLOCK_HVCLOCK) { - barrier(); - return vread_hvclock(); - } -#endif - return U64_MAX; + return __cvdso_clock_gettime(clock, ts); } -notrace static int do_hres(clockid_t clk, struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, sec, ns; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - cycles = vgetcyc(gtod->vclock_mode); - ns = base->nsec; - last = gtod->cycle_last; - if (unlikely((s64)cycles < 0)) - return vdso_fallback_gettime(clk, ts); - if (cycles > last) - ns += (cycles - last) * gtod->mult; - ns >>= gtod->shift; - sec = base->sec; - } while (unlikely(gtod_read_retry(gtod, seq))); - - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace static void do_coarse(clockid_t clk, struct timespec *ts) +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) { - struct vgtod_ts *base = >od->basetime[clk]; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); + return __cvdso_clock_getres(clock, res); } +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - unsigned int msk; - - /* Sort out negative (CPU/FD) and invalid clocks */ - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) - return vdso_fallback_gettime(clock, ts); - - /* - * Convert the clockid to a bitmask and use it to check which - * clocks are handled in the VDSO directly. - */ - msk = 1U << clock; - if (likely(msk & VGTOD_HRES)) { - return do_hres(clock, ts); - } else if (msk & VGTOD_COARSE) { - do_coarse(clock, ts); - return 0; - } - return vdso_fallback_gettime(clock, ts); + return __cvdso_clock_gettime32(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { - if (likely(tv != NULL)) { - struct timespec *ts = (struct timespec *) tv; - - do_hres(CLOCK_REALTIME, ts); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - tz->tz_minuteswest = gtod->tz_minuteswest; - tz->tz_dsttime = gtod->tz_dsttime; - } - - return 0; + return __cvdso_clock_gettime(clock, ts); } -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); -/* - * This will break when the xtime seconds get inaccurate, but that is - * unlikely - */ -notrace time_t __vdso_time(time_t *t) -{ - /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); - if (t) - *t = result; - return result; +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); } -time_t time(time_t *t) - __attribute__((weak, alias("__vdso_time"))); + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); +#endif diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index d3a2dce4cfa9..36b644e16272 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + clock_getres; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 422764a81d32..c7720995ab1a 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -26,6 +26,8 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; }; LINUX_2.5 { diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S index 05cd1c5c4a15..16a8050a4fb6 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8db1f594e8b1..349a61d8bf34 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -22,7 +22,7 @@ #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> -#include <asm/mshyperv.h> +#include <clocksource/hyperv_timer.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile index 1ac4dd116c26..93c1b3e949a7 100644 --- a/arch/x86/entry/vsyscall/Makefile +++ b/arch/x86/entry/vsyscall/Makefile @@ -2,7 +2,5 @@ # # Makefile for the x86 low level vsyscall code # -obj-y := vsyscall_gtod.o - obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c deleted file mode 100644 index cfcdba082feb..000000000000 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Modified for x86 32 bit architecture by - * Stefani Seibold <stefani@seibold.net> - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - */ - -#include <linux/timekeeper_internal.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> - -int vclocks_used __read_mostly; - -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - struct vgtod_ts *base; - u64 nsec; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - gtod_write_begin(vdata); - - /* copy vsyscall data */ - vdata->vclock_mode = vclock_mode; - vdata->cycle_last = tk->tkr_mono.cycle_last; - vdata->mask = tk->tkr_mono.mask; - vdata->mult = tk->tkr_mono.mult; - vdata->shift = tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_REALTIME]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_TAI]; - base->sec = tk->xtime_sec + (s64)tk->tai_offset; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_MONOTONIC]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - base->sec++; - } - base->nsec = nsec; - - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - base->sec++; - } - base->nsec = nsec; - - gtod_write_end(vdata); -} diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1608050e9df9..0e033ef11a9f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,64 +17,13 @@ #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/clockchips.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> - -#ifdef CONFIG_HYPERV_TSCPAGE - -static struct ms_hyperv_tsc_page *tsc_pg; - -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return tsc_pg; -} -EXPORT_SYMBOL_GPL(hv_get_tsc_page); - -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick == U64_MAX) - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - -static u64 read_hv_clock_msr(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_msr = { - .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#include <clocksource/hyperv_timer.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -343,42 +292,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; - /* - * Register Hyper-V specific clocksource. - */ -#ifdef CONFIG_HYPERV_TSCPAGE - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) - goto register_msr_cs; - - hyperv_cs = &hyperv_cs_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; - - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - return; - } -register_msr_cs: -#endif - /* - * For 32 bit guests just use the MSR based mechanism for reading - * the partition counter. - */ - - hyperv_cs = &hyperv_cs_msr; - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - + /* Register Hyper-V specific clocksource */ + hv_init_clocksource(); return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..af78cd72b8f3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC/100) + typedef struct _HV_REFERENCE_TSC_PAGE { __u32 tsc_sequence; __u32 res1; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..f4fa8a9d5d0b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) +#define hv_get_time_ref_count(val) \ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) + +#define hv_get_reference_tsc(val) \ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_reference_tsc(val) \ + wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_clocksource_vdso(val) \ + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) +#define hv_get_raw_timer() rdtsc_ordered() + void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING @@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; @@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} - -#else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} -#endif #endif diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..19b695ff2c68 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include <linux/clocksource.h> +#include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..ae91429129a6 --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <uapi/linux/time.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> +#include <asm/unistd.h> +#include <asm/msr.h> +#include <asm/pvclock.h> +#include <clocksource/hyperv_timer.h> + +#define __vdso_data (VVAR(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +static u64 vread_hvclock(void) +{ + return hv_read_tsc_page(&hvclock_page); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + if (clock_mode == VCLOCK_TSC) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VCLOCK_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TSCPAGE + if (clock_mode == VCLOCK_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0026ab2123ce --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> + +int vclocks_used __read_mostly; + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +static __always_inline +int __x86_get_clock_mode(struct timekeeper *tk) +{ + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + + return vclock_mode; +} +#define __arch_get_clock_mode __x86_get_clock_mode + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a2638c6124ed 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -3,7 +3,9 @@ #define _ASM_X86_VGTOD_H #include <linux/compiler.h> -#include <linux/clocksource.h> +#include <asm/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> #include <uapi/linux/time.h> @@ -13,81 +15,10 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - -/* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. - */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - extern int vclocks_used; static inline bool vclock_was_used(int vclock) { return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} - -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} - -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index e474f5c6e387..32f5d9a0b90e 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -32,19 +32,20 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); #define VVAR(name) (vvar_ ## name) #define DEFINE_VVAR(type, name) \ - type name \ + type name[CS_BASES] \ __attribute__((section(".vvar_" #name), aligned(16))) __visible #endif /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +DECLARE_VVAR(128, struct vdso_data, _vdso_data) #undef DECLARE_VVAR diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..1e5f7a03ddf5 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 0ff3e294d0e5..10125358b9c4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -3,6 +3,7 @@ */ +#include <linux/clocksource.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/notifier.h> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 132d149494d6..ab73a9a639ae 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -261,10 +261,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fafd81d2c9ea..63bb1ee8258e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/intel_pt.h> +#include <clocksource/hyperv_timer.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -1728,7 +1729,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2070,7 +2071,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2086,7 +2087,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2185,7 +2186,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -9015,7 +9016,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9243,7 +9244,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; |