diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2018-05-26 13:45:49 +0200 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2018-05-26 13:45:49 +0200 |
commit | f33ecec9bb5199c5a4dd296af604f70273d2636e (patch) | |
tree | ba12665fe8f7952a64013bef9125bfac27ff1fda /arch/x86 | |
parent | 0ea3286e2df74c9ec2fadbf91170cd3edd14e3e5 (diff) | |
parent | 2d2ccf24939cf369f7473c7e4ea309891be91848 (diff) | |
download | linux-f33ecec9bb5199c5a4dd296af604f70273d2636e.tar.bz2 |
Merge branch 'x86/hyperv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
To resolve conflicts with the PV TLB flush series.
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/vdso/vdso32/vdso-fakesections.c | 1 | ||||
-rw-r--r-- | arch/x86/events/core.c | 8 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 2 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 9 | ||||
-rw-r--r-- | arch/x86/hyperv/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_apic.c | 256 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 32 | ||||
-rw-r--r-- | arch/x86/hyperv/mmu.c | 75 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 30 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 44 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm_para.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_hvm.c | 13 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 4 |
17 files changed, 410 insertions, 89 deletions
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..45b2b1c93d04 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -27,6 +27,7 @@ #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/device.h> +#include <linux/nospec.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; @@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -92,6 +92,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/intel-family.h> enum perf_msr_id { @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 367a8203cfcf..b173d404e3df 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1,2 @@ -obj-y := hv_init.o mmu.o +obj-y := hv_init.o mmu.o +obj-$(CONFIG_X86_64) += hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c new file mode 100644 index 000000000000..f68855499391 --- /dev/null +++ b/arch/x86/hyperv/hv_apic.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific APIC code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : K. Y. Srinivasan <kys@microsoft.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include <linux/types.h> +#include <linux/version.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/clockchips.h> +#include <linux/hyperv.h> +#include <linux/slab.h> +#include <linux/cpuhotplug.h> +#include <asm/hypervisor.h> +#include <asm/mshyperv.h> +#include <asm/apic.h> + +static struct apic orig_apic; + +static u64 hv_apic_icr_read(void) +{ + u64 reg_val; + + rdmsrl(HV_X64_MSR_ICR, reg_val); + return reg_val; +} + +static void hv_apic_icr_write(u32 low, u32 id) +{ + u64 reg_val; + + reg_val = SET_APIC_DEST_FIELD(id); + reg_val = reg_val << 32; + reg_val |= low; + + wrmsrl(HV_X64_MSR_ICR, reg_val); +} + +static u32 hv_apic_read(u32 reg) +{ + u32 reg_val, hi; + + switch (reg) { + case APIC_EOI: + rdmsr(HV_X64_MSR_EOI, reg_val, hi); + return reg_val; + case APIC_TASKPRI: + rdmsr(HV_X64_MSR_TPR, reg_val, hi); + return reg_val; + + default: + return native_apic_mem_read(reg); + } +} + +static void hv_apic_write(u32 reg, u32 val) +{ + switch (reg) { + case APIC_EOI: + wrmsr(HV_X64_MSR_EOI, val, 0); + break; + case APIC_TASKPRI: + wrmsr(HV_X64_MSR_TPR, val, 0); + break; + default: + native_apic_mem_write(reg, val); + } +} + +static void hv_apic_eoi_write(u32 reg, u32 val) +{ + wrmsr(HV_X64_MSR_EOI, val, 0); +} + +/* + * IPI implementation on Hyper-V. + */ +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +{ + struct ipi_arg_ex **arg; + struct ipi_arg_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; + int ret = 1; + + local_irq_save(flags); + arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_ex_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->vp_set.valid_bank_mask = 0; + + if (!cpumask_equal(mask, cpu_present_mask)) { + ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + } + if (!nr_bank) + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + + ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + +ipi_mask_ex_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_mask(const struct cpumask *mask, int vector) +{ + int cur_cpu, vcpu; + struct ipi_arg_non_ex **arg; + struct ipi_arg_non_ex *ipi_arg; + int ret = 1; + unsigned long flags; + + if (cpumask_empty(mask)) + return true; + + if (!hv_hypercall_pg) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return __send_ipi_mask_ex(mask, vector); + + local_irq_save(flags); + arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->cpu_mask = 0; + + for_each_cpu(cur_cpu, mask) { + vcpu = hv_cpu_number_to_vp_number(cur_cpu); + /* + * This particular version of the IPI hypercall can + * only target upto 64 CPUs. + */ + if (vcpu >= 64) + goto ipi_mask_done; + + __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); + } + + ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); + +ipi_mask_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_one(int cpu, int vector) +{ + struct cpumask mask = CPU_MASK_NONE; + + cpumask_set_cpu(cpu, &mask); + return __send_ipi_mask(&mask, vector); +} + +static void hv_send_ipi(int cpu, int vector) +{ + if (!__send_ipi_one(cpu, vector)) + orig_apic.send_IPI(cpu, vector); +} + +static void hv_send_ipi_mask(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector)) + orig_apic.send_IPI_mask(mask, vector); +} + +static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpumask new_mask; + const struct cpumask *local_mask; + + cpumask_copy(&new_mask, mask); + cpumask_clear_cpu(this_cpu, &new_mask); + local_mask = &new_mask; + if (!__send_ipi_mask(local_mask, vector)) + orig_apic.send_IPI_mask_allbutself(mask, vector); +} + +static void hv_send_ipi_allbutself(int vector) +{ + hv_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void hv_send_ipi_all(int vector) +{ + if (!__send_ipi_mask(cpu_online_mask, vector)) + orig_apic.send_IPI_all(vector); +} + +static void hv_send_ipi_self(int vector) +{ + if (!__send_ipi_one(smp_processor_id(), vector)) + orig_apic.send_IPI_self(vector); +} + +void __init hv_apic_init(void) +{ + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + pr_info("Hyper-V: Using ext hypercalls for IPI\n"); + else + pr_info("Hyper-V: Using IPI hypercalls\n"); + /* + * Set the IPI entry points. + */ + orig_apic = *apic; + + apic->send_IPI = hv_send_ipi; + apic->send_IPI_mask = hv_send_ipi_mask; + apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = hv_send_ipi_allbutself; + apic->send_IPI_all = hv_send_ipi_all; + apic->send_IPI_self = hv_send_ipi_self; + } + + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { + pr_info("Hyper-V: Using MSR based APIC access\n"); + apic_set_eoi_write(hv_apic_eoi_write); + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } +} diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index cfecc2272f2d..4c431e1c1eff 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -91,12 +91,19 @@ EXPORT_SYMBOL_GPL(hv_vp_index); struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + void **input_arg; + + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *input_arg = page_address(alloc_page(GFP_KERNEL)); hv_get_vp_index(msr_vp_index); @@ -217,6 +224,16 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + unsigned long flags; + void **input_arg; + void *input_pg = NULL; + + local_irq_save(flags); + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + input_pg = *input_arg; + *input_arg = NULL; + local_irq_restore(flags); + free_page((unsigned long)input_pg); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -242,8 +259,9 @@ static int hv_cpu_die(unsigned int cpu) * * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. + * 3. Setup Hyper-V specific APIC entry points. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; @@ -259,6 +277,16 @@ void hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + + BUG_ON(hyperv_pcpu_input_arg == NULL); + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); @@ -296,7 +324,7 @@ void hyperv_init(void) hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hyper_alloc_mmu(); + hv_apic_init(); /* * Register Hyper-V specific clocksource. diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 56c9ebac946f..5f053d7d1bd9 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -25,20 +25,13 @@ struct hv_flush_pcpu { struct hv_flush_pcpu_ex { u64 address_space; u64 flags; - struct { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; - } hv_vp_set; + struct hv_vpset hv_vp_set; u64 gva_list[]; }; /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu **pcpu_flush; - -static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -70,41 +63,6 @@ static inline int fill_gva_list(u64 gva_list[], int offset, return gva_n - offset; } -/* Return the number of banks in the resulting vp_set */ -static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - flush->hv_vp_set.bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &flush->hv_vp_set.bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - - return nr_bank; -} - static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { @@ -116,7 +74,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -124,10 +82,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -203,7 +159,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush_ex || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -211,10 +167,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush_ex); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -239,8 +193,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.valid_bank_mask = 0; if (!cpumask_equal(cpus, cpu_present_mask)) { - flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; - nr_bank = cpumask_to_vp_set(flush, cpus); + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); } if (!nr_bank) { @@ -296,14 +250,3 @@ void hyperv_setup_mmu_ops(void) pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; } } - -void hyper_alloc_mmu(void) -{ - if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) - return; - - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); - else - pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); -} diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index a8897615354e..ff3f56bdd5d7 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -164,6 +164,11 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +/* + * Recommend using cluster IPI hypercalls. + */ +#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) + /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) @@ -332,12 +337,17 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d @@ -363,7 +373,7 @@ struct hv_tsc_emulation_status { #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, }; @@ -713,4 +723,22 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +struct ipi_arg_non_ex { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +struct ipi_arg_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b90e79610cf7..997192131b7b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -122,6 +122,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; +extern void __percpu **hyperv_pcpu_input_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { @@ -258,9 +259,41 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -void hyperv_init(void); +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + __set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + +void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); @@ -269,6 +302,13 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); + +#ifdef CONFIG_X86_64 +void hv_apic_init(void); +#else +static inline void hv_apic_init(void) {} +#endif + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 4c851ebb3ceb..0ede697c3961 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,7 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 -#define KVM_HINTS_DEDICATED 0 +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c88e0b127810..b481b95bd8f6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,8 +14,11 @@ #include <asm/amd_nb.h> #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -24,6 +27,7 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, {} }; @@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7867417cfaff..5b2300b818af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) static_branch_disable(&virt_spin_lock_key); } @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; __pv_init_lock_hash(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba55be9b5c27..b7bf9ac9b6d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); static bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); -unsigned int min_timer_period_us = 500; +unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 826898701045..19c1ff542387 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void) { early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); + + /* + * The virtual address of the shared_info page has changed, so + * the vcpu_info pointer for VCPU 0 is now stale. + * + * The prepare_boot_cpu callback will re-initialize it via + * xen_vcpu_setup, but we can't rely on that to be called for + * old Xen versions (xen_have_vector_callback == 0). + * + * It is, in any case, bad to have a stale vcpu_info pointer + * so reset it now. + */ + xen_vcpu_info_reset(0); } static void __init init_hvm_pv_info(void) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d33e7dbe3129..2d76106788a3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 486c0a34d00b..2c30cabfda90 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); |