Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini: "RISCV: - Use common KVM implementation of MMU memory caches - SBI v0.2 support for Guest - Initial KVM selftests support - Fix to avoid spurious virtual interrupts after clearing hideleg CSR - Update email address for Anup and Atish ARM: - Simplification of the 'vcpu first run' by integrating it into KVM's 'pid change' flow - Refactoring of the FP and SVE state tracking, also leading to a simpler state and less shared data between EL1 and EL2 in the nVHE case - Tidy up the header file usage for the nvhe hyp object - New HYP unsharing mechanism, finally allowing pages to be unmapped from the Stage-1 EL2 page-tables - Various pKVM cleanups around refcounting and sharing - A couple of vgic fixes for bugs that would trigger once the vcpu xarray rework is merged, but not sooner - Add minimal support for ARMv8.7's PMU extension - Rework kvm_pgtable initialisation ahead of the NV work - New selftest for IRQ injection - Teach selftests about the lack of default IPA space and page sizes - Expand sysreg selftest to deal with Pointer Authentication - The usual bunch of cleanups and doc update s390: - fix sigp sense/start/stop/inconsistency - cleanups x86: - Clean up some function prototypes more - improved gfn_to_pfn_cache with proper invalidation, used by Xen emulation - add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery - completely remove potential TOC/TOU races in nested SVM consistency checks - update some PMCs on emulated instructions - Intel AMX support (joint work between Thomas and Intel) - large MMU cleanups - module parameter to disable PMU virtualization - cleanup register cache - first part of halt handling cleanups - Hyper-V enlightened MSR bitmap support for nested hypervisors Generic: - clean up Makefiles - introduce CONFIG_HAVE_KVM_DIRTY_RING - optimize memslot lookup using a tree - optimize vCPU array usage by converting to xarray" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (268 commits) x86/fpu: Fix inline prefix warnings selftest: kvm: Add amx selftest selftest: kvm: Move struct kvm_x86_state to header selftest: kvm: Reorder vcpu_load_state steps for AMX kvm: x86: Disable interception for IA32_XFD on demand x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state() kvm: selftests: Add support for KVM_CAP_XSAVE2 kvm: x86: Add support for getting/setting expanded xstate buffer x86/fpu: Add uabi_size to guest_fpu kvm: x86: Add CPUID support for Intel AMX kvm: x86: Add XCR0 support for Intel AMX kvm: x86: Disable RDMSR interception of IA32_XFD_ERR kvm: x86: Emulate IA32_XFD_ERR for guest kvm: x86: Intercept #NM for saving IA32_XFD_ERR x86/fpu: Prepare xfd_err in struct fpu_guest kvm: x86: Add emulation for IA32_XFD x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2 x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM x86/fpu: Add guest support to xfd_enable_feature() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-01-16 16:15:14 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-01-16 16:15:14 +0200
commit: 79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (patch)
tree: 0507ef82aa3c7766b7b19163a0351882b7d7c5b5 /arch/x86/kvm/pmu.c
parent: cb3f09f9afe5286c0aed7a1c5cc71495de166efb (diff)
parent: c862dcd199759d4a45e65dab47b03e3e8a144e3a (diff)
download: linux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.tar.bz2
1 files changed, 90 insertions, 38 deletions
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0c2133eb4cf6..261b39cbef6e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -55,43 +55,41 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
 	kvm_pmu_deliver_pmi(vcpu);
 }
 
-static void kvm_perf_overflow(struct perf_event *perf_event,
-			      struct perf_sample_data *data,
-			      struct pt_regs *regs)
+static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 {
-	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
-		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
-		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
-	}
+	/* Ignore counters that have been reprogrammed already. */
+	if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
+		return;
+
+	__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+
+	if (!pmc->intr)
+		return;
+
+	/*
+	 * Inject PMI. If vcpu was in a guest mode during NMI PMI
+	 * can be ejected on a guest mode re-entry. Otherwise we can't
+	 * be sure that vcpu wasn't executing hlt instruction at the
+	 * time of vmexit and is not going to re-enter guest mode until
+	 * woken up. So we should wake it, but this is impossible from
+	 * NMI context. Do it from irq work instead.
+	 */
+	if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
+		irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
+	else
+		kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
 }
 
-static void kvm_perf_overflow_intr(struct perf_event *perf_event,
-				   struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
 {
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
-	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
-	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
-		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
-		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 
-		/*
-		 * Inject PMI. If vcpu was in a guest mode during NMI PMI
-		 * can be ejected on a guest mode re-entry. Otherwise we can't
-		 * be sure that vcpu wasn't executing hlt instruction at the
-		 * time of vmexit and is not going to re-enter guest mode until
-		 * woken up. So we should wake it, but this is impossible from
-		 * NMI context. Do it from irq work instead.
-		 */
-		if (!kvm_handling_nmi_from_guest(pmc->vcpu))
-			irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
-		else
-			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
-	}
+	__kvm_perf_overflow(pmc, true);
 }
 
 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
@@ -126,7 +124,6 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	}
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
-						 intr ? kvm_perf_overflow_intr :
 						 kvm_perf_overflow, pmc);
 	if (IS_ERR(event)) {
 		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
@@ -138,6 +135,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	pmc_to_pmu(pmc)->event_count++;
 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 	pmc->is_paused = false;
+	pmc->intr = intr;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
@@ -174,7 +172,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 {
 	unsigned config, type = PERF_TYPE_RAW;
-	u8 event_select, unit_mask;
 	struct kvm *kvm = pmc->vcpu->kvm;
 	struct kvm_pmu_event_filter *filter;
 	int i;
@@ -206,17 +203,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (!allow_event)
 		return;
 
-	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-
 	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
 			  ARCH_PERFMON_EVENTSEL_INV |
 			  ARCH_PERFMON_EVENTSEL_CMASK |
 			  HSW_IN_TX |
 			  HSW_IN_TX_CHECKPOINTED))) {
-		config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
-						      event_select,
-						      unit_mask);
+		config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
 		if (config != PERF_COUNT_HW_MAX)
 			type = PERF_TYPE_HARDWARE;
 	}
@@ -268,7 +260,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 
 	pmc->current_config = (u64)ctrl;
 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-			      kvm_x86_ops.pmu_ops->find_fixed_event(idx),
+			      kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
 			      !(en_field & 0x2), /* exclude user */
 			      !(en_field & 0x1), /* exclude kernel */
 			      pmi, false, false);
@@ -490,6 +482,66 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
 	kvm_pmu_reset(vcpu);
 }
 
+static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+	u64 prev_count;
+
+	prev_count = pmc->counter;
+	pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
+
+	reprogram_counter(pmu, pmc->idx);
+	if (pmc->counter < prev_count)
+		__kvm_perf_overflow(pmc, false);
+}
+
+static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
+	unsigned int perf_hw_id)
+{
+	u64 old_eventsel = pmc->eventsel;
+	unsigned int config;
+
+	pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
+	config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
+	pmc->eventsel = old_eventsel;
+	return config == perf_hw_id;
+}
+
+static inline bool cpl_is_matched(struct kvm_pmc *pmc)
+{
+	bool select_os, select_user;
+	u64 config = pmc->current_config;
+
+	if (pmc_is_gp(pmc)) {
+		select_os = config & ARCH_PERFMON_EVENTSEL_OS;
+		select_user = config & ARCH_PERFMON_EVENTSEL_USR;
+	} else {
+		select_os = config & 0x1;
+		select_user = config & 0x2;
+	}
+
+	return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
+}
+
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+	int i;
+
+	for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+		if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
+			continue;
+
+		/* Ignore checks for edge detect, pin control, invert and CMASK bits */
+		if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
+			kvm_pmu_incr_counter(pmc);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_pmu_event_filter tmp, *filter;
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-01-16 16:15:14 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-01-16 16:15:14 +0200
commit	79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (patch)
tree	0507ef82aa3c7766b7b19163a0351882b7d7c5b5 /arch/x86/kvm/pmu.c
parent	cb3f09f9afe5286c0aed7a1c5cc71495de166efb (diff)
parent	c862dcd199759d4a45e65dab47b03e3e8a144e3a (diff)
download	linux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.tar.bz2