KVM: x86/mmu: Make HVA handler retpoline-friendly

When retpolines are enabled they have high overhead in the inner loop inside kvm_handle_hva_range() that iterates over the provided memory area. Let's mark this function and its TDP MMU equivalent __always_inline so compiler will be able to change the call to the actual handler function inside each of them into a direct one. This significantly improves performance on the unmap test on the existing kernel memslot code (tested on a Xeon 8167M machine): 30 slots in use: Test Before After Improvement Unmap 0.0353s 0.0334s 5% Unmap 2M 0.00104s 0.000407s 61% 509 slots in use: Test Before After Improvement Unmap 0.0742s 0.0740s None Unmap 2M 0.00221s 0.00159s 28% Looks like having an indirect call in these functions (and, so, a retpoline) might have interfered with unrolling of the whole loop in the CPU. Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Message-Id: <732d3fe9eb68aa08402a638ab0309199fa89ae56.1612810129.git.maciej.szmigiero@oracle.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> 2021-02-08 19:51:32 +0100
committer: Paolo Bonzini <pbonzini@redhat.com> 2021-02-09 08:42:09 -0500
commit: 8f5c44f953d36f8c1aea6d57eb3251e3640f4dad (patch)
tree: 39df906f8c5e159a51562d08d0a58c656e60b7ad /arch
parent: b9ce0f86d91942596bce0267bfa7db3bbd7b434f (diff)
download: linux-8f5c44f953d36f8c1aea6d57eb3251e3640f4dad.tar.bz2
2 files changed, 22 insertions, 15 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index bebb66c6d068..e507568cd55d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1457,16 +1457,17 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
 	     slot_rmap_walk_okay(_iter_);				\
 	     slot_rmap_walk_next(_iter_))
 
-static int kvm_handle_hva_range(struct kvm *kvm,
-				unsigned long start,
-				unsigned long end,
-				unsigned long data,
-				int (*handler)(struct kvm *kvm,
-					       struct kvm_rmap_head *rmap_head,
-					       struct kvm_memory_slot *slot,
-					       gfn_t gfn,
-					       int level,
-					       unsigned long data))
+static __always_inline int
+kvm_handle_hva_range(struct kvm *kvm,
+		     unsigned long start,
+		     unsigned long end,
+		     unsigned long data,
+		     int (*handler)(struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn,
+				    int level,
+				    unsigned long data))
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index ec99e50a7ba7..71e100a5670f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -879,11 +879,17 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	return ret;
 }
 
-static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
-		unsigned long end, unsigned long data,
-		int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot,
-			       struct kvm_mmu_page *root, gfn_t start,
-			       gfn_t end, unsigned long data))
+static __always_inline int
+kvm_tdp_mmu_handle_hva_range(struct kvm *kvm,
+			     unsigned long start,
+			     unsigned long end,
+			     unsigned long data,
+			     int (*handler)(struct kvm *kvm,
+					    struct kvm_memory_slot *slot,
+					    struct kvm_mmu_page *root,
+					    gfn_t start,
+					    gfn_t end,
+					    unsigned long data))
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
author	Maciej S. Szmigiero <maciej.szmigiero@oracle.com>	2021-02-08 19:51:32 +0100
committer	Paolo Bonzini <pbonzini@redhat.com>	2021-02-09 08:42:09 -0500
commit	8f5c44f953d36f8c1aea6d57eb3251e3640f4dad (patch)
tree	39df906f8c5e159a51562d08d0a58c656e60b7ad /arch
parent	b9ce0f86d91942596bce0267bfa7db3bbd7b434f (diff)
download	linux-8f5c44f953d36f8c1aea6d57eb3251e3640f4dad.tar.bz2