diff options
Diffstat (limited to 'arch/x86/kernel/kvmclock.c')
| -rw-r--r-- | arch/x86/kernel/kvmclock.c | 89 | 
1 files changed, 33 insertions, 56 deletions
| diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 08a30986d472..87edf1ceb1df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -18,6 +18,7 @@  #include <linux/clocksource.h>  #include <linux/kvm_para.h> +#include <asm/pvclock.h>  #include <asm/arch_hooks.h>  #include <asm/msr.h>  #include <asm/apic.h> @@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)  early_param("no-kvmclock", parse_no_kvmclock);  /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); -#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field +static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +static struct pvclock_wall_clock wall_clock; -static inline u64 kvm_get_delta(u64 last_tsc) -{ -	int cpu = smp_processor_id(); -	u64 delta = native_read_tsc() - last_tsc; -	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; -} - -static struct kvm_wall_clock wall_clock; -static cycle_t kvm_clock_read(void);  /*   * The wallclock is the time of day when we booted. Since then, some time may   * have elapsed since the hypervisor wrote the data. So we try to account for @@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);   */  static unsigned long kvm_get_wallclock(void)  { -	u32 wc_sec, wc_nsec; -	u64 delta; +	struct pvclock_vcpu_time_info *vcpu_time;  	struct timespec ts; -	int version, nsec;  	int low, high;  	low = (int)__pa(&wall_clock);  	high = ((u64)__pa(&wall_clock) >> 32); +	native_write_msr(MSR_KVM_WALL_CLOCK, low, high); -	delta = kvm_clock_read(); +	vcpu_time = &get_cpu_var(hv_clock); +	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); +	put_cpu_var(hv_clock); -	native_write_msr(MSR_KVM_WALL_CLOCK, low, high); -	do { -		version = wall_clock.wc_version; -		rmb(); -		wc_sec = wall_clock.wc_sec; -		wc_nsec = wall_clock.wc_nsec; -		rmb(); -	} while ((wall_clock.wc_version != version) || (version & 1)); - -	delta = kvm_clock_read() - delta; -	delta += wc_nsec; -	nsec = do_div(delta, NSEC_PER_SEC); -	set_normalized_timespec(&ts, wc_sec + delta, nsec); -	/* -	 * Of all mechanisms of time adjustment I've tested, this one -	 * was the champion! -	 */ -	return ts.tv_sec + 1; +	return ts.tv_sec;  }  static int kvm_set_wallclock(unsigned long now)  { -	return 0; +	return -1;  } -/* - * This is our read_clock function. The host puts an tsc timestamp each time - * it updates a new time. Without the tsc adjustment, we can have a situation - * in which a vcpu starts to run earlier (smaller system_time), but probes - * time later (compared to another vcpu), leading to backwards time - */  static cycle_t kvm_clock_read(void)  { -	u64 last_tsc, now; -	int cpu; +	struct pvclock_vcpu_time_info *src; +	cycle_t ret; -	preempt_disable(); -	cpu = smp_processor_id(); - -	last_tsc = get_clock(cpu, tsc_timestamp); -	now = get_clock(cpu, system_time); - -	now += kvm_get_delta(last_tsc); -	preempt_enable(); - -	return now; +	src = &get_cpu_var(hv_clock); +	ret = pvclock_clocksource_read(src); +	put_cpu_var(hv_clock); +	return ret;  } +  static struct clocksource kvm_clock = {  	.name = "kvm-clock",  	.read = kvm_clock_read, @@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {  	.flags = CLOCK_SOURCE_IS_CONTINUOUS,  }; -static int kvm_register_clock(void) +static int kvm_register_clock(char *txt)  {  	int cpu = smp_processor_id();  	int low, high;  	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;  	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); - +	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", +	       cpu, high, low, txt);  	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);  } @@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)  	 * Now that the first cpu already had this clocksource initialized,  	 * we shouldn't fail.  	 */ -	WARN_ON(kvm_register_clock()); +	WARN_ON(kvm_register_clock("secondary cpu clock"));  	/* ok, done with our trickery, call native */  	setup_secondary_APIC_clock();  }  #endif +#ifdef CONFIG_SMP +void __init kvm_smp_prepare_boot_cpu(void) +{ +	WARN_ON(kvm_register_clock("primary cpu clock")); +	native_smp_prepare_boot_cpu(); +} +#endif +  /*   * After the clock is registered, the host will keep writing to the   * registered memory location. If the guest happens to shutdown, this memory @@ -174,7 +148,7 @@ void __init kvmclock_init(void)  		return;  	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { -		if (kvm_register_clock()) +		if (kvm_register_clock("boot clock"))  			return;  		pv_time_ops.get_wallclock = kvm_get_wallclock;  		pv_time_ops.set_wallclock = kvm_set_wallclock; @@ -182,6 +156,9 @@ void __init kvmclock_init(void)  #ifdef CONFIG_X86_LOCAL_APIC  		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;  #endif +#ifdef CONFIG_SMP +		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; +#endif  		machine_ops.shutdown  = kvm_shutdown;  #ifdef CONFIG_KEXEC  		machine_ops.crash_shutdown  = kvm_crash_shutdown; |