From 23a60f834406c8e3805328b630d09d5546b460c1 Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Mon, 22 Jun 2020 11:46:36 -0400 Subject: s390/kvm: diagnose 0x318 sync and reset DIAGNOSE 0x318 (diag318) sets information regarding the environment the VM is running in (Linux, z/VM, etc) and is observed via firmware/service events. This is a privileged s390x instruction that must be intercepted by SIE. Userspace handles the instruction as well as migration. Data is communicated via VCPU register synchronization. The Control Program Name Code (CPNC) is stored in the SIE block. The CPNC along with the Control Program Version Code (CPVC) are stored in the kvm_vcpu_arch struct. This data is reset on load normal and clear resets. Signed-off-by: Collin Walling Reviewed-by: Janosch Frank Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20200622154636.5499-3-walling@linux.ibm.com [borntraeger@de.ibm.com: fix sync_reg position] Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4fdf30316582..35cdb4307904 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1031,6 +1031,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_S390_DIAG318 184 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From e8c22266e68f0db2a7e11b0a9f29fd88ec0cfd4a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Jun 2020 14:13:34 +0200 Subject: KVM: async_pf: change kvm_setup_async_pf()/kvm_arch_setup_async_pf() return type to bool Unlike normal 'int' functions returning '0' on success, kvm_setup_async_pf()/ kvm_arch_setup_async_pf() return '1' when a job to handle page fault asynchronously was scheduled and '0' otherwise. To avoid the confusion change return type to 'bool'. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20200615121334.91300-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 20 +++++++++----------- arch/x86/kvm/mmu/mmu.c | 4 ++-- include/linux/kvm_host.h | 4 ++-- virt/kvm/async_pf.c | 16 ++++++++++------ 4 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d47c19718615..7fd4fdb165fc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3954,33 +3954,31 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) return true; } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) { hva_t hva; struct kvm_arch_async_pf arch; - int rc; if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) - return 0; + return false; if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != vcpu->arch.pfault_compare) - return 0; + return false; if (psw_extint_disabled(vcpu)) - return 0; + return false; if (kvm_s390_vcpu_has_irq(vcpu, 0)) - return 0; + return false; if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) - return 0; + return false; if (!vcpu->arch.gmap->pfault_enabled) - return 0; + return false; hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); hva += current->thread.gmap_addr & ~PAGE_MASK; if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) - return 0; + return false; - rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); - return rc; + return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); } static int vcpu_pre_run(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 3ca70554d5f1..a1850120ede0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4045,8 +4045,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - gfn_t gfn) +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62ec926c78a0..9edc6fc71a89 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -211,8 +211,8 @@ struct kvm_async_pf { void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - unsigned long hva, struct kvm_arch_async_pf *arch); +bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 45799606bb3e..390f758d5a27 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -156,17 +156,21 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - unsigned long hva, struct kvm_arch_async_pf *arch) +/* + * Try to schedule a job to handle page fault asynchronously. Returns 'true' on + * success, 'false' on failure (page fault has to be handled synchronously). + */ +bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) - return 0; + return false; /* Arch specific code should not do async PF in this case */ if (unlikely(kvm_is_error_hva(hva))) - return 0; + return false; /* * do alloc nowait since if we are going to sleep anyway we @@ -174,7 +178,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, */ work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); if (!work) - return 0; + return false; work->wakeup_all = false; work->vcpu = vcpu; @@ -193,7 +197,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, schedule_work(&work->work); - return 1; + return true; } int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 1aa561b1a4c0ae2a9a9b9c21a84b5ca66b4775d8 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 3 Jun 2020 16:56:21 -0700 Subject: kvm: x86: Add "last CPU" to some KVM_EXIT information More often than not, a failed VM-entry in an x86 production environment is induced by a defective CPU. To help identify the bad hardware, include the id of the last logical CPU to run a vCPU in the information provided to userspace on a KVM exit for failed VM-entry or for KVM internal errors not associated with emulation. The presence of this additional information is indicated by a new capability, KVM_CAP_LAST_CPU. Signed-off-by: Jim Mattson Reviewed-by: Oliver Upton Reviewed-by: Peter Shier Message-Id: <20200603235623.245638-5-jmattson@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 1 + arch/x86/kvm/svm/svm.c | 4 +++- arch/x86/kvm/vmx/vmx.c | 10 ++++++++-- arch/x86/kvm/x86.c | 1 + include/uapi/linux/kvm.h | 2 ++ 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 426f94582b7a..1cfe79b932d6 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4794,6 +4794,7 @@ hardware_exit_reason. /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; /* if KVM_LAST_CPU */ } fail_entry; If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 24b7f321874f..8ecd46f2cb1e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2947,6 +2947,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; + kvm_run->fail_entry.cpu = svm->last_cpu; dump_vmcb(vcpu); return 0; } @@ -2970,8 +2971,9 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; + vcpu->run->internal.ndata = 2; vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = svm->last_cpu; return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4d8f12c0a5c6..b52bcebfa094 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4781,10 +4781,11 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 3; + vcpu->run->internal.ndata = 4; vcpu->run->internal.data[0] = vect_info; vcpu->run->internal.data[1] = intr_info; vcpu->run->internal.data[2] = error_code; + vcpu->run->internal.data[3] = vmx->last_cpu; return 0; } @@ -6006,6 +6007,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = exit_reason; + vcpu->run->fail_entry.cpu = vmx->last_cpu; return 0; } @@ -6014,6 +6016,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + vcpu->run->fail_entry.cpu = vmx->last_cpu; return 0; } @@ -6040,6 +6043,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vcpu->run->internal.data[3] = vmcs_read64(GUEST_PHYSICAL_ADDRESS); } + vcpu->run->internal.data[vcpu->run->internal.ndata++] = + vmx->last_cpu; return 0; } @@ -6095,8 +6100,9 @@ unexpected_vmexit: vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; + vcpu->run->internal.ndata = 2; vcpu->run->internal.data[0] = exit_reason; + vcpu->run->internal.data[1] = vmx->last_cpu; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 82f457f0e7e0..1a0fad1018f9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3510,6 +3510,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_LAST_CPU: r = 1; break; case KVM_CAP_SYNC_REGS: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4fdf30316582..ff9b335620d0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -289,6 +289,7 @@ struct kvm_run { /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { @@ -1031,6 +1032,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_LAST_CPU 184 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 995decb6c43e1d6e9d6a7d590471f2eea74600f4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 8 Jul 2020 16:00:23 +0200 Subject: KVM: x86: take as_id into account when checking PGD OVMF booted guest running on shadow pages crashes on TRIPLE FAULT after enabling paging from SMM. The crash is triggered from mmu_check_root() and is caused by kvm_is_visible_gfn() searching through memslots with as_id = 0 while vCPU may be in a different context (address space). Introduce kvm_vcpu_is_visible_gfn() and use it from mmu_check_root(). Signed-off-by: Vitaly Kuznetsov Message-Id: <20200708140023.1476020-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0011b2c97f65..231beb6d9cf7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3693,7 +3693,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) { int ret = 0; - if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { + if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) { kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); ret = 1; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9edc6fc71a89..87140e79648b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -774,6 +774,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0a68c9d3d3ab..b528a59b0a84 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1626,6 +1626,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); +bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + struct kvm_memory_slot *memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + + return kvm_is_visible_memslot(memslot); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_is_visible_gfn); + unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; -- cgit v1.2.3 From 2aa9c199cf8151c190c7e7ca3ddfcfbb2d85ac36 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 19:35:38 -0700 Subject: KVM: Move x86's version of struct kvm_mmu_memory_cache to common code Move x86's 'struct kvm_mmu_memory_cache' to common code in anticipation of moving the entire x86 implementation code to common KVM and reusing it for arm64 and MIPS. Add a new architecture specific asm/kvm_types.h to control the existence and parameters of the struct. The new header is needed to avoid a chicken-and-egg problem with asm/kvm_host.h as all architectures define instances of the struct in their vCPU structs. Add an asm-generic version of kvm_types.h to avoid having empty files on PPC and s390 in the long term, and for arm64 and mips in the short term. Suggested-by: Christoffer Dall Reviewed-by: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20200703023545.8771-15-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/x86/include/asm/kvm_host.h | 13 ------------- arch/x86/include/asm/kvm_types.h | 7 +++++++ include/asm-generic/kvm_types.h | 5 +++++ include/linux/kvm_types.h | 19 +++++++++++++++++++ 8 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 arch/x86/include/asm/kvm_types.h create mode 100644 include/asm-generic/kvm_types.h (limited to 'include') diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index ff9cbb631212..35a68155cd0e 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += early_ioremap.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 8643d313890e..397e6d24d2ab 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -5,6 +5,7 @@ generated-y += syscall_table_64_n32.h generated-y += syscall_table_64_n64.h generated-y += syscall_table_64_o32.h generic-y += export.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index dadbcf3a0b1e..2d444d09b553 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,6 +4,7 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += vtime.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 83f6e85de7bc..319efa0e6d02 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -6,5 +6,6 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9d41eb5a8453..5aaef036627f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -193,8 +193,6 @@ struct x86_exception; enum x86_intercept; enum x86_intercept_stage; -#define KVM_NR_MEM_OBJS 40 - #define KVM_NR_DB_REGS 4 #define DR6_BD (1 << 13) @@ -245,17 +243,6 @@ enum x86_intercept_stage; struct kvm_kernel_irq_routing_entry; -/* - * We don't want allocation failures within the mmu code, so we preallocate - * enough memory for a single page fault in a cache. - */ -struct kvm_mmu_memory_cache { - int nobjs; - gfp_t gfp_zero; - struct kmem_cache *kmem_cache; - void *objects[KVM_NR_MEM_OBJS]; -}; - /* * the pages used as guest page table on soft mmu are tracked by * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h new file mode 100644 index 000000000000..08f1b57d3b62 --- /dev/null +++ b/arch/x86/include/asm/kvm_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_TYPES_H +#define _ASM_X86_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_X86_KVM_TYPES_H */ diff --git a/include/asm-generic/kvm_types.h b/include/asm-generic/kvm_types.h new file mode 100644 index 000000000000..2a82daf110f1 --- /dev/null +++ b/include/asm-generic/kvm_types.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_KVM_TYPES_H +#define _ASM_GENERIC_KVM_TYPES_H + +#endif diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 68e84cf42a3f..a7580f69dda0 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -20,6 +20,8 @@ enum kvm_mr_change; #include +#include + /* * Address types: * @@ -58,4 +60,21 @@ struct gfn_to_pfn_cache { bool dirty; }; +#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE +/* + * Memory caches are used to preallocate memory ahead of various MMU flows, + * e.g. page fault handlers. Gracefully handling allocation failures deep in + * MMU flows is problematic, as is triggering reclaim, I/O, etc... while + * holding MMU locks. Note, these caches act more like prefetch buffers than + * classical caches, i.e. objects are not returned to the cache on being freed. + */ +struct kvm_mmu_memory_cache { + int nobjs; + gfp_t gfp_zero; + struct kmem_cache *kmem_cache; + void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; +}; +#endif + + #endif /* __KVM_TYPES_H__ */ -- cgit v1.2.3 From 6926f95accee3f8ceb5f69dbecd880687028ae70 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 19:35:39 -0700 Subject: KVM: Move x86's MMU memory cache helpers to common KVM code Move x86's memory cache helpers to common KVM code so that they can be reused by arm64 and MIPS in future patches. Suggested-by: Christoffer Dall Reviewed-by: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20200703023545.8771-16-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 53 ---------------------------------------------- include/linux/kvm_host.h | 7 ++++++ virt/kvm/kvm_main.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 942b6a90cb17..fa506aaaf019 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1061,47 +1061,6 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) local_irq_enable(); } -static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, - gfp_t gfp_flags) -{ - gfp_flags |= mc->gfp_zero; - - if (mc->kmem_cache) - return kmem_cache_alloc(mc->kmem_cache, gfp_flags); - else - return (void *)__get_free_page(gfp_flags); -} - -static int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) -{ - void *obj; - - if (mc->nobjs >= min) - return 0; - while (mc->nobjs < ARRAY_SIZE(mc->objects)) { - obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT); - if (!obj) - return mc->nobjs >= min ? 0 : -ENOMEM; - mc->objects[mc->nobjs++] = obj; - } - return 0; -} - -static int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc) -{ - return mc->nobjs; -} - -static void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) -{ - while (mc->nobjs) { - if (mc->kmem_cache) - kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); - else - free_page((unsigned long)mc->objects[--mc->nobjs]); - } -} - static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) { int r; @@ -1133,18 +1092,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); } -static void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) -{ - void *p; - - if (WARN_ON(!mc->nobjs)) - p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT); - else - p = mc->objects[--mc->nobjs]; - BUG_ON(!p); - return p; -} - static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) { return kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87140e79648b..989afcbe642f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -817,6 +817,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); +#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE +int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); +int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); +void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); +void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); +#endif + bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b528a59b0a84..2c2c0254c2d8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -340,6 +340,61 @@ void kvm_reload_remote_mmus(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } +#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE +static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, + gfp_t gfp_flags) +{ + gfp_flags |= mc->gfp_zero; + + if (mc->kmem_cache) + return kmem_cache_alloc(mc->kmem_cache, gfp_flags); + else + return (void *)__get_free_page(gfp_flags); +} + +int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) +{ + void *obj; + + if (mc->nobjs >= min) + return 0; + while (mc->nobjs < ARRAY_SIZE(mc->objects)) { + obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT); + if (!obj) + return mc->nobjs >= min ? 0 : -ENOMEM; + mc->objects[mc->nobjs++] = obj; + } + return 0; +} + +int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc) +{ + return mc->nobjs; +} + +void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) { + if (mc->kmem_cache) + kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); + else + free_page((unsigned long)mc->objects[--mc->nobjs]); + } +} + +void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) +{ + void *p; + + if (WARN_ON(!mc->nobjs)) + p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT); + else + p = mc->objects[--mc->nobjs]; + BUG_ON(!p); + return p; +} +#endif + static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { mutex_init(&vcpu->mutex); -- cgit v1.2.3 From 3edd68399dc155b80335244c8c2673eaa652931a Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Fri, 10 Jul 2020 17:48:11 +0200 Subject: KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support This patch adds a new capability KVM_CAP_SMALLER_MAXPHYADDR which allows userspace to query if the underlying architecture would support GUEST_MAXPHYADDR < HOST_MAXPHYADDR and hence act accordingly (e.g. qemu can decide if it should warn for -cpu ..,phys-bits=X) The complications in this patch are due to unexpected (but documented) behaviour we see with NPF vmexit handling in AMD processor. If SVM is modified to add guest physical address checks in the NPF and guest #PF paths, we see the followning error multiple times in the 'access' test in kvm-unit-tests: test pte.p pte.36 pde.p: FAIL: pte 2000021 expected 2000001 Dump mapping: address: 0x123400000000 ------L4: 24c3027 ------L3: 24c4027 ------L2: 24c5021 ------L1: 1002000021 This is because the PTE's accessed bit is set by the CPU hardware before the NPF vmexit. This is handled completely by hardware and cannot be fixed in software. Therefore, availability of the new capability depends on a boolean variable allow_smaller_maxphyaddr which is set individually by VMX and SVM init routines. On VMX it's always set to true, on SVM it's only set to true when NPT is not enabled. CC: Tom Lendacky CC: Babu Moger Signed-off-by: Mohammed Gamal Message-Id: <20200710154811.418214-10-mgamal@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm/svm.c | 15 +++++++++++++++ arch/x86/kvm/vmx/vmx.c | 7 +++++++ arch/x86/kvm/x86.c | 6 ++++++ include/uapi/linux/kvm.h | 2 ++ 5 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1df95f10c903..1bab87a444d7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1263,7 +1263,7 @@ struct kvm_arch_async_pf { }; extern u64 __read_mostly host_efer; - +extern bool __read_mostly allow_smaller_maxphyaddr; extern struct kvm_x86_ops kvm_x86_ops; #define __KVM_HAVE_ARCH_VM_ALLOC diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2371b1e40f39..783330d0e7b8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -924,6 +924,21 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); + /* + * It seems that on AMD processors PTE's accessed bit is + * being set by the CPU hardware before the NPF vmexit. + * This is not expected behaviour and our tests fail because + * of it. + * A workaround here is to disable support for + * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. + * In this case userspace can know if there is support using + * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle + * it + * If future AMD CPU models change the behaviour described above, + * this variable can be changed accordingly + */ + allow_smaller_maxphyaddr = !npt_enabled; + return 0; err: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 962a78c7dde5..1bb59ae5016d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8309,6 +8309,13 @@ static int __init vmx_init(void) #endif vmx_check_vmcs12_offsets(); + /* + * Intel processors don't have problems with + * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable + * it for VMX by default + */ + allow_smaller_maxphyaddr = true; + return 0; } module_init(vmx_init); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 35abe69aad28..95ef62922869 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -187,6 +187,9 @@ static struct kvm_shared_msrs __percpu *shared_msrs; u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); +bool __read_mostly allow_smaller_maxphyaddr; +EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); + static u64 __read_mostly host_xss; u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); @@ -3574,6 +3577,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; break; + case KVM_CAP_SMALLER_MAXPHYADDR: + r = (int) allow_smaller_maxphyaddr; + break; default: break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ff9b335620d0..2c73dcfb3dbb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1033,6 +1033,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 #define KVM_CAP_LAST_CPU 184 +#define KVM_CAP_SMALLER_MAXPHYADDR 185 + #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3