diff options
author | Alexey Makhalov <amakhalov@vmware.com> | 2016-10-28 00:54:32 -0700 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-10-30 08:57:08 +0100 |
commit | 80e9a4f21fd7ccce7e9b8439986fd028c9946dda (patch) | |
tree | 4e438295f52bacffc2ca2df175cf68c3b2722e44 | |
parent | 91d1e54ebd1615d216b7f57324a5e69166a344e0 (diff) | |
download | linux-80e9a4f21fd7ccce7e9b8439986fd028c9946dda.tar.bz2 |
x86/vmware: Add paravirt sched clock
The default sched_clock() implementation is native_sched_clock(). It
contains code to handle non constant frequency TSCs, which creates
overhead for systems with constant frequency TSCs.
The vmware hypervisor guarantees a constant frequency TSC, so
native_sched_clock() is not required and slower than a dedicated function
which operates with one time calculated conversion factors.
Calculate the conversion factors at boot time from the tsc frequency and
install an optimized sched_clock() function via paravirt ops.
The paravirtualized clock can be disabled on the kernel command line with
the new 'no-vmw-sched-clock' option.
Signed-off-by: Alexey Makhalov <amakhalov@vmware.com>
Acked-by: Alok N Kataria <akataria@vmware.com>
Cc: linux-doc@vger.kernel.org
Cc: pv-drivers@vmware.com
Cc: corbet@lwn.net
Cc: virtualization@lists.linux-foundation.org
Link: http://lkml.kernel.org/r/20161028075432.90579-4-amakhalov@vmware.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 42 |
2 files changed, 46 insertions, 0 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..b3b2ec00646f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2754,6 +2754,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page fault handling. + no-vmw-sched-clock + [X86,PV_OPS] Disable paravirtualized VMware scheduler + clock and use the default one. + no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting. steal time is computed, but won't influence scheduler behaviour diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 098a524a1646..cdbe38be28fd 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -24,10 +24,15 @@ #include <linux/dmi.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/clocksource.h> #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> #include <asm/apic.h> +#include <asm/timer.h> + +#undef pr_fmt +#define pr_fmt(fmt) "vmware: " fmt #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 @@ -62,10 +67,47 @@ static unsigned long vmware_get_tsc_khz(void) } #ifdef CONFIG_PARAVIRT +static struct cyc2ns_data vmware_cyc2ns __ro_after_init; +static int vmw_sched_clock __initdata = 1; + +static __init int setup_vmw_sched_clock(char *s) +{ + vmw_sched_clock = 0; + return 0; +} +early_param("no-vmw-sched-clock", setup_vmw_sched_clock); + +static unsigned long long vmware_sched_clock(void) +{ + unsigned long long ns; + + ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, + vmware_cyc2ns.cyc2ns_shift); + ns -= vmware_cyc2ns.cyc2ns_offset; + return ns; +} + +static void __init vmware_sched_clock_setup(void) +{ + struct cyc2ns_data *d = &vmware_cyc2ns; + unsigned long long tsc_now = rdtsc(); + + clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift, + vmware_tsc_khz, NSEC_PER_MSEC, 0); + d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, + d->cyc2ns_shift); + + pv_time_ops.sched_clock = vmware_sched_clock; + pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset); +} + static void __init vmware_paravirt_ops_setup(void) { pv_info.name = "VMware hypervisor"; pv_cpu_ops.io_delay = paravirt_nop; + + if (vmware_tsc_khz && vmw_sched_clock) + vmware_sched_clock_setup(); } #else #define vmware_paravirt_ops_setup() do {} while (0) |