summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/pmu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-16 16:15:14 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-16 16:15:14 +0200
commit79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (patch)
tree0507ef82aa3c7766b7b19163a0351882b7d7c5b5 /arch/x86/kvm/pmu.c
parentcb3f09f9afe5286c0aed7a1c5cc71495de166efb (diff)
parentc862dcd199759d4a45e65dab47b03e3e8a144e3a (diff)
downloadlinux-79e06c4c4950be2abd8ca5d2428a8c915aa62c24.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "RISCV: - Use common KVM implementation of MMU memory caches - SBI v0.2 support for Guest - Initial KVM selftests support - Fix to avoid spurious virtual interrupts after clearing hideleg CSR - Update email address for Anup and Atish ARM: - Simplification of the 'vcpu first run' by integrating it into KVM's 'pid change' flow - Refactoring of the FP and SVE state tracking, also leading to a simpler state and less shared data between EL1 and EL2 in the nVHE case - Tidy up the header file usage for the nvhe hyp object - New HYP unsharing mechanism, finally allowing pages to be unmapped from the Stage-1 EL2 page-tables - Various pKVM cleanups around refcounting and sharing - A couple of vgic fixes for bugs that would trigger once the vcpu xarray rework is merged, but not sooner - Add minimal support for ARMv8.7's PMU extension - Rework kvm_pgtable initialisation ahead of the NV work - New selftest for IRQ injection - Teach selftests about the lack of default IPA space and page sizes - Expand sysreg selftest to deal with Pointer Authentication - The usual bunch of cleanups and doc update s390: - fix sigp sense/start/stop/inconsistency - cleanups x86: - Clean up some function prototypes more - improved gfn_to_pfn_cache with proper invalidation, used by Xen emulation - add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery - completely remove potential TOC/TOU races in nested SVM consistency checks - update some PMCs on emulated instructions - Intel AMX support (joint work between Thomas and Intel) - large MMU cleanups - module parameter to disable PMU virtualization - cleanup register cache - first part of halt handling cleanups - Hyper-V enlightened MSR bitmap support for nested hypervisors Generic: - clean up Makefiles - introduce CONFIG_HAVE_KVM_DIRTY_RING - optimize memslot lookup using a tree - optimize vCPU array usage by converting to xarray" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (268 commits) x86/fpu: Fix inline prefix warnings selftest: kvm: Add amx selftest selftest: kvm: Move struct kvm_x86_state to header selftest: kvm: Reorder vcpu_load_state steps for AMX kvm: x86: Disable interception for IA32_XFD on demand x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state() kvm: selftests: Add support for KVM_CAP_XSAVE2 kvm: x86: Add support for getting/setting expanded xstate buffer x86/fpu: Add uabi_size to guest_fpu kvm: x86: Add CPUID support for Intel AMX kvm: x86: Add XCR0 support for Intel AMX kvm: x86: Disable RDMSR interception of IA32_XFD_ERR kvm: x86: Emulate IA32_XFD_ERR for guest kvm: x86: Intercept #NM for saving IA32_XFD_ERR x86/fpu: Prepare xfd_err in struct fpu_guest kvm: x86: Add emulation for IA32_XFD x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2 x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM x86/fpu: Add guest support to xfd_enable_feature() ...
Diffstat (limited to 'arch/x86/kvm/pmu.c')
-rw-r--r--arch/x86/kvm/pmu.c128
1 files changed, 90 insertions, 38 deletions
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0c2133eb4cf6..261b39cbef6e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -55,43 +55,41 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
kvm_pmu_deliver_pmi(vcpu);
}
-static void kvm_perf_overflow(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
- struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
- }
+ /* Ignore counters that have been reprogrammed already. */
+ if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
+ return;
+
+ __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+
+ if (!pmc->intr)
+ return;
+
+ /*
+ * Inject PMI. If vcpu was in a guest mode during NMI PMI
+ * can be ejected on a guest mode re-entry. Otherwise we can't
+ * be sure that vcpu wasn't executing hlt instruction at the
+ * time of vmexit and is not going to re-enter guest mode until
+ * woken up. So we should wake it, but this is impossible from
+ * NMI context. Do it from irq work instead.
+ */
+ if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
+ irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
+ else
+ kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
-static void kvm_perf_overflow_intr(struct perf_event *perf_event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static void kvm_perf_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
- __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
- kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (!kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
- kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
- }
+ __kvm_perf_overflow(pmc, true);
}
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
@@ -126,7 +124,6 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
}
event = perf_event_create_kernel_counter(&attr, -1, current,
- intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
@@ -138,6 +135,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
pmc_to_pmu(pmc)->event_count++;
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
pmc->is_paused = false;
+ pmc->intr = intr;
}
static void pmc_pause_counter(struct kvm_pmc *pmc)
@@ -174,7 +172,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
- u8 event_select, unit_mask;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
int i;
@@ -206,17 +203,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (!allow_event)
return;
- event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
- unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
ARCH_PERFMON_EVENTSEL_INV |
ARCH_PERFMON_EVENTSEL_CMASK |
HSW_IN_TX |
HSW_IN_TX_CHECKPOINTED))) {
- config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
- event_select,
- unit_mask);
+ config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
if (config != PERF_COUNT_HW_MAX)
type = PERF_TYPE_HARDWARE;
}
@@ -268,7 +260,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- kvm_x86_ops.pmu_ops->find_fixed_event(idx),
+ kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
!(en_field & 0x2), /* exclude user */
!(en_field & 0x1), /* exclude kernel */
pmi, false, false);
@@ -490,6 +482,66 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
kvm_pmu_reset(vcpu);
}
+static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u64 prev_count;
+
+ prev_count = pmc->counter;
+ pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
+
+ reprogram_counter(pmu, pmc->idx);
+ if (pmc->counter < prev_count)
+ __kvm_perf_overflow(pmc, false);
+}
+
+static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
+ unsigned int perf_hw_id)
+{
+ u64 old_eventsel = pmc->eventsel;
+ unsigned int config;
+
+ pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
+ config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
+ pmc->eventsel = old_eventsel;
+ return config == perf_hw_id;
+}
+
+static inline bool cpl_is_matched(struct kvm_pmc *pmc)
+{
+ bool select_os, select_user;
+ u64 config = pmc->current_config;
+
+ if (pmc_is_gp(pmc)) {
+ select_os = config & ARCH_PERFMON_EVENTSEL_OS;
+ select_user = config & ARCH_PERFMON_EVENTSEL_USR;
+ } else {
+ select_os = config & 0x1;
+ select_user = config & 0x2;
+ }
+
+ return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
+}
+
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int i;
+
+ for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+ if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
+ continue;
+
+ /* Ignore checks for edge detect, pin control, invert and CMASK bits */
+ if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
+ kvm_pmu_incr_counter(pmc);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
{
struct kvm_pmu_event_filter tmp, *filter;