From d7c9bfb9caaffd496ae44b258ec7c793677d3eeb Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 23 Aug 2022 14:32:37 +0800 Subject: KVM: x86/mmu: fix memoryleak in kvm_mmu_vendor_module_init() When register_shrinker() fails, KVM doesn't release the percpu counter kvm_total_used_mmu_pages leading to memoryleak. Fix this issue by calling percpu_counter_destroy() when register_shrinker() fails. Fixes: ab271bd4dfd5 ("x86: kvm: propagate register_shrinker return code") Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/20220823063237.47299-1-linmiaohe@huawei.com [sean: tweak shortlog and changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/mmu/mmu.c') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e418ef3ecfcb..d25d55b1f0b5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6702,10 +6702,12 @@ int kvm_mmu_vendor_module_init(void) ret = register_shrinker(&mmu_shrinker, "x86-mmu"); if (ret) - goto out; + goto out_shrinker; return 0; +out_shrinker: + percpu_counter_destroy(&kvm_total_used_mmu_pages); out: mmu_destroy_caches(); return ret; -- cgit v1.2.3 From 43a063cab325ee7cc50349967e536b3cd4e57f03 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 23 Aug 2022 00:46:37 +0000 Subject: KVM: x86/mmu: count KVM mmu usage in secondary pagetable stats. Count the pages used by KVM mmu on x86 in memory stats under secondary pagetable stats (e.g. "SecPageTables" in /proc/meminfo) to give better visibility into the memory consumption of KVM mmu in a similar way to how normal user page tables are accounted. Add the inner helper in common KVM, ARM will also use it to count stats in a future commit. Signed-off-by: Yosry Ahmed Reviewed-by: Sean Christopherson Acked-by: Marc Zyngier # generic KVM changes Link: https://lore.kernel.org/r/20220823004639.2387269-3-yosryahmed@google.com Link: https://lore.kernel.org/r/20220823004639.2387269-4-yosryahmed@google.com [sean: squash x86 usage to workaround modpost issues] Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 16 ++++++++++++++-- arch/x86/kvm/mmu/tdp_mmu.c | 12 ++++++++++++ include/linux/kvm_host.h | 13 +++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/mmu/mmu.c') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d25d55b1f0b5..32b60a6b83bd 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1665,6 +1665,18 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) percpu_counter_add(&kvm_total_used_mmu_pages, nr); } +static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_mod_used_mmu_pages(kvm, +1); + kvm_account_pgtable_pages((void *)sp->spt, +1); +} + +static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_mod_used_mmu_pages(kvm, -1); + kvm_account_pgtable_pages((void *)sp->spt, -1); +} + static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) { MMU_WARN_ON(!is_empty_shadow_page(sp->spt)); @@ -2122,7 +2134,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, */ sp->mmu_valid_gen = kvm->arch.mmu_valid_gen; list_add(&sp->link, &kvm->arch.active_mmu_pages); - kvm_mod_used_mmu_pages(kvm, +1); + kvm_account_mmu_page(kvm, sp); sp->gfn = gfn; sp->role = role; @@ -2456,7 +2468,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, list_add(&sp->link, invalid_list); else list_move(&sp->link, invalid_list); - kvm_mod_used_mmu_pages(kvm, -1); + kvm_unaccount_mmu_page(kvm, sp); } else { /* * Remove the active root from the active page list, the root diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bf2ccf9debca..672f0432d777 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -372,6 +372,16 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, } } +static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_account_pgtable_pages((void *)sp->spt, +1); +} + +static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + kvm_account_pgtable_pages((void *)sp->spt, -1); +} + /** * tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages * @@ -384,6 +394,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, bool shared) { + tdp_unaccount_mmu_page(kvm, sp); if (shared) spin_lock(&kvm->arch.tdp_mmu_pages_lock); else @@ -1132,6 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, if (account_nx) account_huge_nx_page(kvm, sp); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + tdp_account_mmu_page(kvm, sp); return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f4519d3689e1..04c7e5f2f727 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2247,6 +2247,19 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * If more than one page is being (un)accounted, @virt must be the address of + * the first page of a block of pages what were allocated together (i.e + * accounted together). + * + * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state() + * is thread-safe. + */ +static inline void kvm_account_pgtable_pages(void *virt, int nr) +{ + mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr); +} + /* * This defines how many reserved entries we want to keep before we * kick the vcpu to the userspace to avoid dirty ring full. This -- cgit v1.2.3 From faa03b39722a86e4b83e594151d1775875a0c7ca Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 10 May 2022 16:10:00 +0900 Subject: KVM: Add extra information in kvm_page_fault trace point Currently, kvm_page_fault trace point provide fault_address and error code. However it is not enough to find which cpu and instruction cause kvm_page_faults. So add vcpu id and instruction pointer in kvm_page_fault trace point. Cc: Baik Song An Cc: Hong Yeon Kim Cc: Taeung Song Cc: linuxgeek@linuxgeek.io Signed-off-by: Wonhyuk Yang Link: https://lore.kernel.org/r/20220510071001.87169-1-vvghjk1234@gmail.com Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/trace.h | 12 +++++++++--- arch/x86/kvm/vmx/vmx.c | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/mmu/mmu.c') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 32b60a6b83bd..40feb5ec761e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4302,7 +4302,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, vcpu->arch.l1tf_flush_l1d = true; if (!flags) { - trace_kvm_page_fault(fault_address, error_code); + trace_kvm_page_fault(vcpu, fault_address, error_code); if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, fault_address); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 26a348389ece..ec2b42a5abe3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1975,7 +1975,7 @@ static int npf_interception(struct kvm_vcpu *vcpu) u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; - trace_kvm_page_fault(fault_address, error_code); + trace_kvm_page_fault(vcpu, fault_address, error_code); return kvm_mmu_page_fault(vcpu, fault_address, error_code, static_cpu_has(X86_FEATURE_DECODEASSISTS) ? svm->vmcb->control.insn_bytes : NULL, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2120d7c060a9..331bdb0ae4b1 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -394,20 +394,26 @@ TRACE_EVENT(kvm_inj_exception, * Tracepoint for page fault. */ TRACE_EVENT(kvm_page_fault, - TP_PROTO(unsigned long fault_address, unsigned int error_code), - TP_ARGS(fault_address, error_code), + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long fault_address, + unsigned int error_code), + TP_ARGS(vcpu, fault_address, error_code), TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( unsigned long, guest_rip ) __field( unsigned long, fault_address ) __field( unsigned int, error_code ) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->guest_rip = kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), - TP_printk("address %lx error_code %x", + TP_printk("vcpu %u rip 0x%lx address 0x%lx error_code %x", + __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, __entry->error_code) ); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c9b49a09e6b5..3eae41086416 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5638,7 +5638,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - trace_kvm_page_fault(gpa, exit_qualification); + trace_kvm_page_fault(vcpu, gpa, exit_qualification); /* Is it a read fault? */ error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) -- cgit v1.2.3