summaryrefslogtreecommitdiffstats
path: root/arch/x86/vdso
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-04-23 13:20:18 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2015-04-27 15:49:30 +0200
commit73459e2a1ada09a68c02cc5b73f3116fc8194b3d (patch)
tree2d85beb72d2ee970926f65554099764e2cdc1f9c /arch/x86/vdso
parent5dca0d9147458be9b9363b8a484aa77d710b412a (diff)
downloadlinux-73459e2a1ada09a68c02cc5b73f3116fc8194b3d.tar.bz2
x86: pvclock: Really remove the sched notifier for cross-cpu migrations
This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra <peterz@infradead.org> Suggested-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/vdso')
-rw-r--r--arch/x86/vdso/vclock_gettime.c34
1 files changed, 15 insertions, 19 deletions
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 40d2473836c9..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode)
cycle_t ret;
u64 last;
u32 version;
- u32 migrate_count;
u8 flags;
unsigned cpu, cpu1;
/*
- * When looping to get a consistent (time-info, tsc) pair, we
- * also need to deal with the possibility we can switch vcpus,
- * so make sure we always re-fetch time-info for the current vcpu.
+ * Note: hypervisor must guarantee that:
+ * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+ * 2. that per-CPU pvclock time info is updated if the
+ * underlying CPU changes.
+ * 3. that version is increased whenever underlying CPU
+ * changes.
+ *
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode)
* __getcpu() calls (Gleb).
*/
- /* Make sure migrate_count will change if we leave the VCPU. */
- do {
- pvti = get_pvti(cpu);
- migrate_count = pvti->migrate_count;
-
- cpu1 = cpu;
- cpu = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1));
+ pvti = get_pvti(cpu);
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
* Test we're still on the cpu as well as the version.
- * - We must read TSC of pvti's VCPU.
- * - KVM doesn't follow the versioning protocol, so data could
- * change before version if we left the VCPU.
+ * We could have been migrated just after the first
+ * vgetcpu but before fetching the version, so we
+ * wouldn't notice a version change.
*/
- smp_rmb();
- } while (unlikely((pvti->pvti.version & 1) ||
- pvti->pvti.version != version ||
- pvti->migrate_count != migrate_count));
+ cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+ } while (unlikely(cpu != cpu1 ||
+ (pvti->pvti.version & 1) ||
+ pvti->pvti.version != version));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;