diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 13:22:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 13:22:47 -0700 |
commit | cd3eb7efaa995db00db0ba64893814f9831be842 (patch) | |
tree | e6c5d21c5ea43e796a6cc61dde7efcad078888d9 /drivers/iommu/intel/perf.c | |
parent | 35e43538af8fd2cb39d58caca1134a87db173f75 (diff) | |
parent | 2b9d8e3e9a9bb693a8b8bd26ad192db037517759 (diff) | |
download | linux-cd3eb7efaa995db00db0ba64893814f9831be842.tar.bz2 |
Merge tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
- SMMU Updates from Will Deacon:
- SMMUv3:
- Support stalling faults for platform devices
- Decrease defaults sizes for the event and PRI queues
- SMMUv2:
- Support for a new '->probe_finalize' hook, needed by Nvidia
- Even more Qualcomm compatible strings
- Avoid Adreno TTBR1 quirk for DB820C platform
- Intel VT-d updates from Lu Baolu:
- Convert Intel IOMMU to use sva_lib helpers in iommu core
- ftrace and debugfs supports for page fault handling
- Support asynchronous nested capabilities
- Various misc cleanups
- Support for new VIOT ACPI table to make the VirtIO IOMMU
available on x86
- Add the amd_iommu=force_enable command line option to enable
the IOMMU on platforms where they are known to cause problems
- Support for version 2 of the Rockchip IOMMU
- Various smaller fixes, cleanups and refactorings
* tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (66 commits)
iommu/virtio: Enable x86 support
iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops()
ACPI: Add driver for the VIOT table
ACPI: Move IOMMU setup code out of IORT
ACPI: arm64: Move DMA setup operations out of IORT
iommu/vt-d: Fix dereference of pointer info before it is null checked
iommu: Update "iommu.strict" documentation
iommu/arm-smmu: Check smmu->impl pointer before dereferencing
iommu/arm-smmu-v3: Remove unnecessary oom message
iommu/arm-smmu: Fix arm_smmu_device refcount leak in address translation
iommu/arm-smmu: Fix arm_smmu_device refcount leak when arm_smmu_rpm_get fails
iommu/vt-d: Fix linker error on 32-bit
iommu/vt-d: No need to typecast
iommu/vt-d: Define counter explicitly as unsigned int
iommu/vt-d: Remove unnecessary braces
iommu/vt-d: Removed unused iommu_count in dmar domain
iommu/vt-d: Use bitfields for DMAR capabilities
iommu/vt-d: Use DEVICE_ATTR_RO macro
iommu/vt-d: Fix out-bounds-warning in intel/svm.c
iommu/vt-d: Add PRQ handling latency sampling
...
Diffstat (limited to 'drivers/iommu/intel/perf.c')
-rw-r--r-- | drivers/iommu/intel/perf.c | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c new file mode 100644 index 000000000000..73b7ec705552 --- /dev/null +++ b/drivers/iommu/intel/perf.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * perf.c - performance monitor + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu <baolu.lu@linux.intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + */ + +#include <linux/spinlock.h> +#include <linux/intel-iommu.h> + +#include "perf.h" + +static DEFINE_SPINLOCK(latency_lock); + +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + + return lstat && lstat[type].enabled; +} + +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat; + unsigned long flags; + int ret = -EBUSY; + + if (dmar_latency_enabled(iommu, type)) + return 0; + + spin_lock_irqsave(&latency_lock, flags); + if (!iommu->perf_statistic) { + iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + GFP_ATOMIC); + if (!iommu->perf_statistic) { + ret = -ENOMEM; + goto unlock_out; + } + } + + lstat = iommu->perf_statistic; + + if (!lstat[type].enabled) { + lstat[type].enabled = true; + lstat[type].counter[COUNTS_MIN] = UINT_MAX; + ret = 0; + } +unlock_out: + spin_unlock_irqrestore(&latency_lock, flags); + + return ret; +} + +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM); + spin_unlock_irqrestore(&latency_lock, flags); +} + +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + u64 min, max; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + if (latency < 100) + lstat[type].counter[COUNTS_10e2]++; + else if (latency < 1000) + lstat[type].counter[COUNTS_10e3]++; + else if (latency < 10000) + lstat[type].counter[COUNTS_10e4]++; + else if (latency < 100000) + lstat[type].counter[COUNTS_10e5]++; + else if (latency < 1000000) + lstat[type].counter[COUNTS_10e6]++; + else if (latency < 10000000) + lstat[type].counter[COUNTS_10e7]++; + else + lstat[type].counter[COUNTS_10e8_plus]++; + + min = lstat[type].counter[COUNTS_MIN]; + max = lstat[type].counter[COUNTS_MAX]; + lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency); + lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency); + lstat[type].counter[COUNTS_SUM] += latency; + lstat[type].samples++; + spin_unlock_irqrestore(&latency_lock, flags); +} + +static char *latency_counter_names[] = { + " <0.1us", + " 0.1us-1us", " 1us-10us", " 10us-100us", + " 100us-1ms", " 1ms-10ms", " >=10ms", + " min(us)", " max(us)", " average(us)" +}; + +static char *latency_type_names[] = { + " inv_iotlb", " inv_devtlb", " inv_iec", + " svm_prq" +}; + +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + int bytes = 0, i, j; + + memset(str, 0, size); + + for (i = 0; i < COUNTS_NUM; i++) + bytes += snprintf(str + bytes, size - bytes, + "%s", latency_counter_names[i]); + + spin_lock_irqsave(&latency_lock, flags); + for (i = 0; i < DMAR_LATENCY_NUM; i++) { + if (!dmar_latency_enabled(iommu, i)) + continue; + + bytes += snprintf(str + bytes, size - bytes, + "\n%s", latency_type_names[i]); + + for (j = 0; j < COUNTS_NUM; j++) { + u64 val = lstat[i].counter[j]; + + switch (j) { + case COUNTS_MIN: + if (val == UINT_MAX) + val = 0; + else + val = div_u64(val, 1000); + break; + case COUNTS_MAX: + val = div_u64(val, 1000); + break; + case COUNTS_SUM: + if (lstat[i].samples) + val = div_u64(val, (lstat[i].samples * 1000)); + else + val = 0; + break; + default: + break; + } + + bytes += snprintf(str + bytes, size - bytes, + "%12lld", val); + } + } + spin_unlock_irqrestore(&latency_lock, flags); + + return bytes; +} |