From ef9ee4ad38445a30909c48998624861716f2a994 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:06:29 +0200 Subject: perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_* > arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids[cache_type]' (local cap) > arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids' (local cap) > arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs[cache_type]' (local cap) > arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs' (local cap) Userspace controls @config which contains 3 (byte) fields used for a 3 dimensional array deref. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..b1be0ac51ce0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -304,17 +304,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; -- cgit v1.2.3 From 46b1b577229a091b137831becaa0fae8690ee15a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:08:58 +0200 Subject: perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map() > arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) > arch/x86/events/intel/core.c:337 intel_pmu_event_map() warn: potential spectre issue 'intel_perfmon_event_map' > arch/x86/events/intel/knc.c:122 knc_pmu_event_map() warn: potential spectre issue 'knc_perfmon_event_map' > arch/x86/events/intel/p4.c:722 p4_pmu_event_map() warn: potential spectre issue 'p4_general_events' > arch/x86/events/intel/p6.c:116 p6_pmu_event_map() warn: potential spectre issue 'p6_perfmon_event_map' > arch/x86/events/amd/core.c:132 amd_pmu_event_map() warn: potential spectre issue 'amd_perfmon_event_map' Userspace controls @attr, sanitize @attr->config before passing it on to x86_pmu::event_map(). Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index b1be0ac51ce0..45b2b1c93d04 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -424,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ -- cgit v1.2.3 From 06ce6e9b6d6c09d4129c6e24a1314a395d816c10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:23:36 +0200 Subject: perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver > arch/x86/events/msr.c:178 msr_event_init() warn: potential spectre issue 'msr' (local cap) Userspace controls @attr, sanitize cfg (attr->config) before using it to index an array. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include enum perf_msr_id { @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; -- cgit v1.2.3 From a5f81290ce475489fa2551c01a07470c1a4c932e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:25:48 +0200 Subject: perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr > arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) Userspace controls @attr, sanitize cfg (attr->config) before using it to index an array. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include "../perf_event.h" @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; -- cgit v1.2.3 From e0f6d1a526b6adfa9ca3b336b83ece0eed345033 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 4 May 2018 19:59:35 +0200 Subject: x86/vdso: Remove unused file commit da861e18eccc ("x86, vdso: Get rid of the fake section mechanism") left this file behind; nothing is using it anymore. Signed-off-by: Jann Horn Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Link: http://lkml.kernel.org/r/20180504175935.104085-1-jannh@google.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vdso32/vdso-fakesections.c | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86/entry/vdso/vdso32/vdso-fakesections.c (limited to 'arch/x86') diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" -- cgit v1.2.3 From d1ecfa9d1f402366b1776fbf84e635678a51414f Mon Sep 17 00:00:00 2001 From: "van der Linden, Frank" Date: Fri, 4 May 2018 16:11:00 -0400 Subject: x86/xen: Reset VCPU0 info pointer after shared_info remap This patch fixes crashes during boot for HVM guests on older (pre HVM vector callback) Xen versions. Without this, current kernels will always fail to boot on those Xen versions. Sample stack trace: BUG: unable to handle kernel paging request at ffffffffff200000 IP: __xen_evtchn_do_upcall+0x1e/0x80 PGD 1e0e067 P4D 1e0e067 PUD 1e10067 PMD 235c067 PTE 0 Oops: 0002 [#1] SMP PTI Modules linked in: CPU: 0 PID: 512 Comm: kworker/u2:0 Not tainted 4.14.33-52.13.amzn1.x86_64 #1 Hardware name: Xen HVM domU, BIOS 3.4.3.amazon 11/11/2016 task: ffff88002531d700 task.stack: ffffc90000480000 RIP: 0010:__xen_evtchn_do_upcall+0x1e/0x80 RSP: 0000:ffff880025403ef0 EFLAGS: 00010046 RAX: ffffffff813cc760 RBX: ffffffffff200000 RCX: ffffc90000483ef0 RDX: ffff880020540a00 RSI: ffff880023c78000 RDI: 000000000000001c RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff880025403f5c R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880025400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff200000 CR3: 0000000001e0a000 CR4: 00000000000006f0 Call Trace: do_hvm_evtchn_intr+0xa/0x10 __handle_irq_event_percpu+0x43/0x1a0 handle_irq_event_percpu+0x20/0x50 handle_irq_event+0x39/0x60 handle_fasteoi_irq+0x80/0x140 handle_irq+0xaf/0x120 do_IRQ+0x41/0xd0 common_interrupt+0x7d/0x7d During boot, the HYPERVISOR_shared_info page gets remapped to make it work with KASLR. This means that any pointer derived from it needs to be adjusted. The only value that this applies to is the vcpu_info pointer for VCPU 0. For PV and HVM with the callback vector feature, this gets done via the smp_ops prepare_boot_cpu callback. Older Xen versions do not support the HVM callback vector, so there is no Xen-specific smp_ops set up in that scenario. So, the vcpu_info pointer for VCPU 0 never gets set to the proper value, and the first reference of it will be bad. Fix this by resetting it immediately after the remap. Signed-off-by: Frank van der Linden Reviewed-by: Eduardo Valentin Reviewed-by: Alakesh Haloi Reviewed-by: Vallish Vaidyeshwara Reviewed-by: Boris Ostrovsky Cc: Juergen Gross Cc: Boris Ostrovsky Cc: xen-devel@lists.xenproject.org Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_hvm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 826898701045..19c1ff542387 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void) { early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); + + /* + * The virtual address of the shared_info page has changed, so + * the vcpu_info pointer for VCPU 0 is now stale. + * + * The prepare_boot_cpu callback will re-initialize it via + * xen_vcpu_setup, but we can't rely on that to be called for + * old Xen versions (xen_have_vector_callback == 0). + * + * It is, in any case, bad to have a stale vcpu_info pointer + * so reset it now. + */ + xen_vcpu_info_reset(0); } static void __init init_hvm_pv_info(void) -- cgit v1.2.3 From f9bc6b2dd9cf025f827f471769e1d88b527bfb91 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 May 2018 13:01:32 -0700 Subject: x86/amd_nb: Add support for Raven Ridge CPUs Add Raven Ridge root bridge and data fabric PCI IDs. This is required for amd_pci_dev_to_node_id() and amd_smn_read(). Cc: stable@vger.kernel.org # v4.16+ Tested-by: Gabriel Craciunescu Acked-by: Thomas Gleixner Signed-off-by: Guenter Roeck --- arch/x86/kernel/amd_nb.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c88e0b127810..b481b95bd8f6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,8 +14,11 @@ #include #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -24,6 +27,7 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, {} }; @@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; -- cgit v1.2.3 From 45dd9b0666a162f8e4be76096716670cf1741f0e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 9 May 2018 14:36:09 -0400 Subject: tracing/x86/xen: Remove zero data size trace events trace_xen_mmu_flush_tlb{_all} Doing an audit of trace events, I discovered two trace events in the xen subsystem that use a hack to create zero data size trace events. This is not what trace events are for. Trace events add memory footprint overhead, and if all you need to do is see if a function is hit or not, simply make that function noinline and use function tracer filtering. Worse yet, the hack used was: __array(char, x, 0) Which creates a static string of zero in length. There's assumptions about such constructs in ftrace that this is a dynamic string that is nul terminated. This is not the case with these tracepoints and can cause problems in various parts of ftrace. Nuke the trace events! Link: http://lkml.kernel.org/r/20180509144605.5a220327@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 95a7d76897c1e ("xen/mmu: Use Xen specific TLB flush instead of the generic one.") Reviewed-by: Juergen Gross Signed-off-by: Steven Rostedt (VMware) --- arch/x86/xen/mmu.c | 4 +--- arch/x86/xen/mmu_pv.c | 4 +--- include/trace/events/xen.h | 16 ---------------- 3 files changed, 2 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d33e7dbe3129..2d76106788a3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 486c0a34d00b..2c30cabfda90 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 7dd8f34c37df..fdcf88bcf0ea 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd, DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin); DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin); -TRACE_EVENT(xen_mmu_flush_tlb_all, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - -TRACE_EVENT(xen_mmu_flush_tlb, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_PROTO(unsigned long addr), TP_ARGS(addr), -- cgit v1.2.3 From 4c27625b7a67eb9006963ed2bcf8e53b259b43af Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 5 May 2018 04:02:32 -0700 Subject: KVM: X86: Lower the default timer frequency limit to 200us MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthoine reported: The period used by Windows change over time but it can be 1 milliseconds or less. I saw the limit_periodic_timer_frequency print so 500 microseconds is sometimes reached. As suggested by Paolo, lower the default timer frequency limit to a smaller interval of 200 us (5000 Hz) to leave some headroom. This is required due to Windows 10 changing the scheduler tick limit from 1024 Hz to 2048 Hz. Reported-by: Anthoine Bourgeois Suggested-by: Paolo Bonzini Reviewed-by: Darren Kenny Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Anthoine Bourgeois Cc: Darren Kenny Cc: Jan Kiszka Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37dd9a9d050a..59371de5d722 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); static bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); -unsigned int min_timer_period_us = 500; +unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; -- cgit v1.2.3 From 633711e82878dc29083fc5d2605166755e25b57a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 17 May 2018 17:54:24 +0300 Subject: kvm: rename KVM_HINTS_DEDICATED to KVM_HINTS_REALTIME KVM_HINTS_DEDICATED seems to be somewhat confusing: Guest doesn't really care whether it's the only task running on a host CPU as long as it's not preempted. And there are more reasons for Guest to be preempted than host CPU sharing, for example, with memory overcommit it can get preempted on a memory access, post copy migration can cause preemption, etc. Let's call it KVM_HINTS_REALTIME which seems to better match what guests expect. Also, the flag most be set on all vCPUs - current guests assume this. Note so in the documentation. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.txt | 6 +++--- arch/x86/include/uapi/asm/kvm_para.h | 2 +- arch/x86/kernel/kvm.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index d4f33eb805dd..ab022dcd0911 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side flag || value || meaning ================================================================================== -KVM_HINTS_DEDICATED || 0 || guest checks this feature bit to - || || determine if there is vCPU pinning - || || and there is no vCPU over-commitment, +KVM_HINTS_REALTIME || 0 || guest checks this feature bit to + || || determine that vCPUs are never + || || preempted for an unlimited time, || || allowing optimizations ---------------------------------------------------------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 4c851ebb3ceb..0ede697c3961 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,7 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 -#define KVM_HINTS_DEDICATED 0 +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7867417cfaff..5b2300b818af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) static_branch_disable(&virt_spin_lock_key); } @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; __pv_init_lock_hash(); -- cgit v1.2.3 From 6b48cb5f8347bc0153ff1d7b075db92e6723ffdb Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:30 -0700 Subject: X86/Hyper-V: Enlighten APIC access Hyper-V supports MSR based APIC access; implement the enlightenment. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-1-kys@linuxonhyperv.com --- arch/x86/hyperv/Makefile | 2 +- arch/x86/hyperv/hv_apic.c | 104 ++++++++++++++++++++++++++++++++++++++++ arch/x86/hyperv/hv_init.c | 5 +- arch/x86/include/asm/mshyperv.h | 4 +- 4 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 arch/x86/hyperv/hv_apic.c (limited to 'arch/x86') diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 367a8203cfcf..00ce4df01a09 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1 @@ -obj-y := hv_init.o mmu.o +obj-y := hv_init.o mmu.o hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c new file mode 100644 index 000000000000..ca20e31d311c --- /dev/null +++ b/arch/x86/hyperv/hv_apic.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific APIC code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#if IS_ENABLED(CONFIG_HYPERV) + +static u64 hv_apic_icr_read(void) +{ + u64 reg_val; + + rdmsrl(HV_X64_MSR_ICR, reg_val); + return reg_val; +} + +static void hv_apic_icr_write(u32 low, u32 id) +{ + u64 reg_val; + + reg_val = SET_APIC_DEST_FIELD(id); + reg_val = reg_val << 32; + reg_val |= low; + + wrmsrl(HV_X64_MSR_ICR, reg_val); +} + +static u32 hv_apic_read(u32 reg) +{ + u32 reg_val, hi; + + switch (reg) { + case APIC_EOI: + rdmsr(HV_X64_MSR_EOI, reg_val, hi); + return reg_val; + case APIC_TASKPRI: + rdmsr(HV_X64_MSR_TPR, reg_val, hi); + return reg_val; + + default: + return native_apic_mem_read(reg); + } +} + +static void hv_apic_write(u32 reg, u32 val) +{ + switch (reg) { + case APIC_EOI: + wrmsr(HV_X64_MSR_EOI, val, 0); + break; + case APIC_TASKPRI: + wrmsr(HV_X64_MSR_TPR, val, 0); + break; + default: + native_apic_mem_write(reg, val); + } +} + +static void hv_apic_eoi_write(u32 reg, u32 val) +{ + wrmsr(HV_X64_MSR_EOI, val, 0); +} + +void __init hv_apic_init(void) +{ + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { + pr_info("Hyper-V: Using MSR based APIC access\n"); + apic_set_eoi_write(hv_apic_eoi_write); + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } +} + +#endif /* CONFIG_HYPERV */ +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index cfecc2272f2d..71e50fc2b7ef 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -242,8 +242,9 @@ static int hv_cpu_die(unsigned int cpu) * * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. + * 3. Setup Hyper-V specific APIC entry points. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; @@ -298,6 +299,8 @@ void hyperv_init(void) hyper_alloc_mmu(); + hv_apic_init(); + /* * Register Hyper-V specific clocksource. */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b90e79610cf7..162977b82e2e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -258,7 +258,7 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -void hyperv_init(void); +void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); @@ -269,6 +269,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); +void hv_apic_init(void); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } @@ -277,6 +278,7 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; +static inline void hv_apic_init(void) {} static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) { return NULL; -- cgit v1.2.3 From 68bb7bfb7985df2bd15c2dc975cb68b7a901488a Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:31 -0700 Subject: X86/Hyper-V: Enable IPI enlightenments Hyper-V supports hypercalls to implement IPI; use them. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-2-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 117 +++++++++++++++++++++++++++++++++++++ arch/x86/hyperv/hv_init.c | 27 +++++++++ arch/x86/include/asm/hyperv-tlfs.h | 15 +++++ arch/x86/include/asm/mshyperv.h | 1 + 4 files changed, 160 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index ca20e31d311c..3e0de61f1a7c 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -33,6 +33,8 @@ #ifdef CONFIG_X86_64 #if IS_ENABLED(CONFIG_HYPERV) +static struct apic orig_apic; + static u64 hv_apic_icr_read(void) { u64 reg_val; @@ -88,8 +90,123 @@ static void hv_apic_eoi_write(u32 reg, u32 val) wrmsr(HV_X64_MSR_EOI, val, 0); } +/* + * IPI implementation on Hyper-V. + */ +static bool __send_ipi_mask(const struct cpumask *mask, int vector) +{ + int cur_cpu, vcpu; + struct ipi_arg_non_ex **arg; + struct ipi_arg_non_ex *ipi_arg; + int ret = 1; + unsigned long flags; + + if (cpumask_empty(mask)) + return true; + + if (!hv_hypercall_pg) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + local_irq_save(flags); + arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->cpu_mask = 0; + + for_each_cpu(cur_cpu, mask) { + vcpu = hv_cpu_number_to_vp_number(cur_cpu); + /* + * This particular version of the IPI hypercall can + * only target upto 64 CPUs. + */ + if (vcpu >= 64) + goto ipi_mask_done; + + __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); + } + + ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); + +ipi_mask_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_one(int cpu, int vector) +{ + struct cpumask mask = CPU_MASK_NONE; + + cpumask_set_cpu(cpu, &mask); + return __send_ipi_mask(&mask, vector); +} + +static void hv_send_ipi(int cpu, int vector) +{ + if (!__send_ipi_one(cpu, vector)) + orig_apic.send_IPI(cpu, vector); +} + +static void hv_send_ipi_mask(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector)) + orig_apic.send_IPI_mask(mask, vector); +} + +static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpumask new_mask; + const struct cpumask *local_mask; + + cpumask_copy(&new_mask, mask); + cpumask_clear_cpu(this_cpu, &new_mask); + local_mask = &new_mask; + if (!__send_ipi_mask(local_mask, vector)) + orig_apic.send_IPI_mask_allbutself(mask, vector); +} + +static void hv_send_ipi_allbutself(int vector) +{ + hv_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void hv_send_ipi_all(int vector) +{ + if (!__send_ipi_mask(cpu_online_mask, vector)) + orig_apic.send_IPI_all(vector); +} + +static void hv_send_ipi_self(int vector) +{ + if (!__send_ipi_one(smp_processor_id(), vector)) + orig_apic.send_IPI_self(vector); +} + void __init hv_apic_init(void) { + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { + pr_info("Hyper-V: Using IPI hypercalls\n"); + /* + * Set the IPI entry points. + */ + orig_apic = *apic; + + apic->send_IPI = hv_send_ipi; + apic->send_IPI_mask = hv_send_ipi_mask; + apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = hv_send_ipi_allbutself; + apic->send_IPI_all = hv_send_ipi_all; + apic->send_IPI_self = hv_send_ipi_self; + } + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { pr_info("Hyper-V: Using MSR based APIC access\n"); apic_set_eoi_write(hv_apic_eoi_write); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 71e50fc2b7ef..6bc90d68ac8b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -91,12 +91,19 @@ EXPORT_SYMBOL_GPL(hv_vp_index); struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + void **input_arg; + + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *input_arg = page_address(alloc_page(GFP_KERNEL)); hv_get_vp_index(msr_vp_index); @@ -217,6 +224,16 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + unsigned long flags; + void **input_arg; + void *input_pg = NULL; + + local_irq_save(flags); + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + input_pg = *input_arg; + *input_arg = NULL; + local_irq_restore(flags); + free_page((unsigned long)input_pg); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -260,6 +277,16 @@ void __init hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + + BUG_ON(hyperv_pcpu_input_arg == NULL); + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 416cb0e0c496..332e786d4deb 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -164,6 +164,11 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +/* + * Recommend using cluster IPI hypercalls. + */ +#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) + /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) @@ -329,10 +334,14 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_POST_MESSAGE 0x005c @@ -706,4 +715,10 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +struct ipi_arg_non_ex { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 162977b82e2e..1eff91599c2b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -122,6 +122,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; +extern void __percpu **hyperv_pcpu_input_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { -- cgit v1.2.3 From 366f03b0cf90ef55f063d4a54cf62b0ac9b6da9d Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:32 -0700 Subject: X86/Hyper-V: Enhanced IPI enlightenment Support enhanced IPI enlightenments (to target more than 64 CPUs). Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-3-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 42 +++++++++++++++++++++++++++++++++++++- arch/x86/hyperv/mmu.c | 2 +- arch/x86/include/asm/hyperv-tlfs.h | 15 +++++++++++++- arch/x86/include/asm/mshyperv.h | 33 ++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 3e0de61f1a7c..192b6ad6a361 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -93,6 +93,40 @@ static void hv_apic_eoi_write(u32 reg, u32 val) /* * IPI implementation on Hyper-V. */ +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +{ + struct ipi_arg_ex **arg; + struct ipi_arg_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; + int ret = 1; + + local_irq_save(flags); + arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_ex_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->vp_set.valid_bank_mask = 0; + + if (!cpumask_equal(mask, cpu_present_mask)) { + ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + } + if (!nr_bank) + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + + ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + +ipi_mask_ex_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + static bool __send_ipi_mask(const struct cpumask *mask, int vector) { int cur_cpu, vcpu; @@ -110,6 +144,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return __send_ipi_mask_ex(mask, vector); + local_irq_save(flags); arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); @@ -193,7 +230,10 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { - pr_info("Hyper-V: Using IPI hypercalls\n"); + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + pr_info("Hyper-V: Using ext hypercalls for IPI\n"); + else + pr_info("Hyper-V: Using IPI hypercalls\n"); /* * Set the IPI entry points. */ diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 56c9ebac946f..adee39a7a3f2 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -239,7 +239,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.valid_bank_mask = 0; if (!cpumask_equal(cpus, cpu_present_mask)) { - flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vp_set(flush, cpus); } diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 332e786d4deb..3bfa92c2793c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -344,6 +344,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d @@ -369,7 +370,7 @@ struct hv_tsc_emulation_status { #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, }; @@ -721,4 +722,16 @@ struct ipi_arg_non_ex { u64 cpu_mask; }; +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +struct ipi_arg_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1eff91599c2b..0ee82519957b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -259,6 +259,39 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + __set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); void hyper_alloc_mmu(void); -- cgit v1.2.3 From 800b8f03fdc8d66885ff03de531285526a4ca0d4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:33 -0700 Subject: X86/Hyper-V: Consolidate code for converting cpumask to vpset Consolidate code for converting cpumask to vpset. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-4-kys@linuxonhyperv.com --- arch/x86/hyperv/mmu.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index adee39a7a3f2..c9cd28f0bae4 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -25,11 +25,7 @@ struct hv_flush_pcpu { struct hv_flush_pcpu_ex { u64 address_space; u64 flags; - struct { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; - } hv_vp_set; + struct hv_vpset hv_vp_set; u64 gva_list[]; }; @@ -70,41 +66,6 @@ static inline int fill_gva_list(u64 gva_list[], int offset, return gva_n - offset; } -/* Return the number of banks in the resulting vp_set */ -static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - flush->hv_vp_set.bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &flush->hv_vp_set.bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - - return nr_bank; -} - static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { @@ -240,7 +201,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, if (!cpumask_equal(cpus, cpu_present_mask)) { flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vp_set(flush, cpus); + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); } if (!nr_bank) { -- cgit v1.2.3 From 9a2d78e291a7dea0ae4b4a06ce6bbbe4f1ab7c13 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:34 -0700 Subject: X86/Hyper-V: Consolidate the allocation of the hypercall input page Consolidate the allocation of the hypercall input page. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-5-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_init.c | 2 -- arch/x86/hyperv/mmu.c | 30 ++++++------------------------ arch/x86/include/asm/mshyperv.h | 1 - 3 files changed, 6 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6bc90d68ac8b..4c431e1c1eff 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -324,8 +324,6 @@ void __init hyperv_init(void) hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hyper_alloc_mmu(); - hv_apic_init(); /* diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index c9cd28f0bae4..5f053d7d1bd9 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -32,9 +32,6 @@ struct hv_flush_pcpu_ex { /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu **pcpu_flush; - -static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -77,7 +74,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -85,10 +82,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -164,7 +159,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush_ex || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -172,10 +167,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush_ex); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -257,14 +250,3 @@ void hyperv_setup_mmu_ops(void) pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; } } - -void hyper_alloc_mmu(void) -{ - if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) - return; - - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); - else - pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); -} diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 0ee82519957b..9aaa493f5756 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -294,7 +294,6 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); -- cgit v1.2.3 From 61eeb1f6d1f2648a218855d7c8d44f16df242ef3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 May 2018 16:38:59 +0200 Subject: x86/Hyper-V/hv_apic: Include asm/apic.h Not all configurations magically include asm/apic.h, but the Hyper-V code requires it. Include it explicitely. Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access") Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Michael Kelley --- arch/x86/hyperv/hv_apic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 192b6ad6a361..d3ff6e255924 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #if IS_ENABLED(CONFIG_HYPERV) -- cgit v1.2.3 From 2d2ccf24939cf369f7473c7e4ea309891be91848 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 May 2018 21:22:48 +0200 Subject: x86/Hyper-V/hv_apic: Build the Hyper-V APIC conditionally The Hyper-V APIC code is built when CONFIG_HYPERV is enabled but the actual code in that file is guarded with CONFIG_X86_64. There is no point in doing this. Neither is there a point in having the CONFIG_HYPERV guard in there because the containing directory is not built when CONFIG_HYPERV=n. Further for the hv_init_apic() function a stub is provided only for CONFIG_HYPERV=n, which is pointless as the callsite is not compiled at all. But for X86_32 the stub is missing and the build fails. Clean that up: - Compile hv_apic.c only when CONFIG_X86_64=y - Make the stub for hv_init_apic() available when CONFG_X86_64=n Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access") Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Michael Kelley --- arch/x86/hyperv/Makefile | 3 ++- arch/x86/hyperv/hv_apic.c | 6 ------ arch/x86/include/asm/mshyperv.h | 7 ++++++- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 00ce4df01a09..b173d404e3df 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1,2 @@ -obj-y := hv_init.o mmu.o hv_apic.o +obj-y := hv_init.o mmu.o +obj-$(CONFIG_X86_64) += hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index d3ff6e255924..f68855499391 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -31,9 +31,6 @@ #include #include -#ifdef CONFIG_X86_64 -#if IS_ENABLED(CONFIG_HYPERV) - static struct apic orig_apic; static u64 hv_apic_icr_read(void) @@ -257,6 +254,3 @@ void __init hv_apic_init(void) apic->icr_read = hv_apic_icr_read; } } - -#endif /* CONFIG_HYPERV */ -#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 9aaa493f5756..997192131b7b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -302,7 +302,13 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); + +#ifdef CONFIG_X86_64 void hv_apic_init(void); +#else +static inline void hv_apic_init(void) {} +#endif + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } @@ -311,7 +317,6 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; -static inline void hv_apic_init(void) {} static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) { return NULL; -- cgit v1.2.3