From ed59dfd9509d172e4920994ed9cbebf93b0050cc Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 23 May 2022 19:31:25 +0800 Subject: asm-generic: Add memory barrier dma_mb() The memory barrier dma_mb() is introduced by commit a76a37777f2c ("iommu/arm-smmu-v3: Ensure queue is read after updating prod pointer"), which is used to ensure that prior (both reads and writes) accesses to memory by a CPU are ordered w.r.t. a subsequent MMIO write. Reviewed-by: Arnd Bergmann # for asm-generic Signed-off-by: Kefeng Wang Reviewed-by: Marco Elver Link: https://lore.kernel.org/r/20220523113126.171714-2-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- include/asm-generic/barrier.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fd7e8fbaeef1..961f4d88f9ef 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -38,6 +38,10 @@ #define wmb() do { kcsan_wmb(); __wmb(); } while (0) #endif +#ifdef __dma_mb +#define dma_mb() do { kcsan_mb(); __dma_mb(); } while (0) +#endif + #ifdef __dma_rmb #define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0) #endif @@ -65,6 +69,10 @@ #define wmb() mb() #endif +#ifndef dma_mb +#define dma_mb() mb() +#endif + #ifndef dma_rmb #define dma_rmb() rmb() #endif -- cgit v1.2.3 From abc5992b9dd0ba599f7a91d5a0f4569a78ae88ac Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Jun 2022 20:50:23 +0800 Subject: mm: ioremap: Use more sensible name in ioremap_prot() Use more meaningful and sensible naming phys_addr instead addr in ioremap_prot(). Suggested-by: Andrew Morton Reviewed-by: Anshuman Khandual Signed-off-by: Kefeng Wang Reviewed-by: Baoquan He Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610092255.32445-1-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- include/asm-generic/io.h | 3 ++- mm/ioremap.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ce93aaf69f8..b76379628a02 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -964,7 +964,8 @@ static inline void iounmap(volatile void __iomem *addr) #elif defined(CONFIG_GENERIC_IOREMAP) #include -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioremap(phys_addr_t addr, size_t size) diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fe598ecd9b7..2d754b48d230 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -11,20 +11,21 @@ #include #include -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { unsigned long offset, vaddr; phys_addr_t last_addr; struct vm_struct *area; /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) return NULL; /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; + offset = phys_addr & (~PAGE_MASK); + phys_addr -= offset; size = PAGE_ALIGN(size + offset); area = get_vm_area_caller(size, VM_IOREMAP, @@ -33,7 +34,8 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) return NULL; vaddr = (unsigned long)area->addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(prot))) { free_vm_area(area); return NULL; } -- cgit v1.2.3 From 18e780b4e6ab89a3a10f46d151971863562c1c91 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Jun 2022 20:50:25 +0800 Subject: mm: ioremap: Add ioremap/iounmap_allowed() Add special hook for architecture to verify addr, size or prot when ioremap() or iounmap(), which will make the generic ioremap more useful. ioremap_allowed() return a bool, - true means continue to remap - false means skip remap and return directly iounmap_allowed() return a bool, - true means continue to vunmap - false code means skip vunmap and return directly Meanwhile, only vunmap the address when it is in vmalloc area as the generic ioremap only returns vmalloc addresses. Acked-by: Andrew Morton Signed-off-by: Kefeng Wang Reviewed-by: Christoph Hellwig Reviewed-by: Baoquan He Link: https://lore.kernel.org/r/20220607125027.44946-5-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- include/asm-generic/io.h | 26 ++++++++++++++++++++++++++ mm/ioremap.c | 11 ++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index b76379628a02..db5b890eaff7 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -964,6 +964,32 @@ static inline void iounmap(volatile void __iomem *addr) #elif defined(CONFIG_GENERIC_IOREMAP) #include +/* + * Arch code can implement the following two hooks when using GENERIC_IOREMAP + * ioremap_allowed() return a bool, + * - true means continue to remap + * - false means skip remap and return directly + * iounmap_allowed() return a bool, + * - true means continue to vunmap + * - false means skip vunmap and return directly + */ +#ifndef ioremap_allowed +#define ioremap_allowed ioremap_allowed +static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size, + unsigned long prot) +{ + return true; +} +#endif + +#ifndef iounmap_allowed +#define iounmap_allowed iounmap_allowed +static inline bool iounmap_allowed(void *addr) +{ + return true; +} +#endif + void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, unsigned long prot); void iounmap(volatile void __iomem *addr); diff --git a/mm/ioremap.c b/mm/ioremap.c index e1d008e8f87f..8652426282cc 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -28,6 +28,9 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, phys_addr -= offset; size = PAGE_ALIGN(size + offset); + if (!ioremap_allowed(phys_addr, size, prot)) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, __builtin_return_address(0)); if (!area) @@ -47,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot); void iounmap(volatile void __iomem *addr) { - vunmap((void *)((unsigned long)addr & PAGE_MASK)); + void *vaddr = (void *)((unsigned long)addr & PAGE_MASK); + + if (!iounmap_allowed(vaddr)) + return; + + if (is_vmalloc_addr(vaddr)) + vunmap(vaddr); } EXPORT_SYMBOL(iounmap); -- cgit v1.2.3 From e9a023f2b73ac35ff5cfbefe8524c64d8173d65f Mon Sep 17 00:00:00 2001 From: Eric Lin Date: Tue, 5 Jul 2022 17:19:20 +0800 Subject: drivers/perf: riscv_pmu: Add riscv pmu pm notifier Currently, when the CPU is doing suspend to ram, we don't save pmu counter register and its content will be lost. To ensure perf profiling is not affected by suspend to ram, this patch is based on arm_pmu CPU_PM notifier and implements riscv pmu pm notifier. In the pm notifier, we stop the counter and update the counter value before suspend and start the counter after resume. Signed-off-by: Eric Lin Link: https://lore.kernel.org/r/20220705091920.27432-1-eric.lin@sifive.com Signed-off-by: Will Deacon --- drivers/perf/riscv_pmu.c | 4 +-- drivers/perf/riscv_pmu_sbi.c | 81 +++++++++++++++++++++++++++++++++++++++--- include/linux/perf/riscv_pmu.h | 4 +++ 3 files changed, 83 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index b2b8d2074ed0..2c961839903d 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -121,7 +121,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) return delta; } -static void riscv_pmu_stop(struct perf_event *event, int flags) +void riscv_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); @@ -175,7 +175,7 @@ int riscv_pmu_event_set_period(struct perf_event *event) return overflow; } -static void riscv_pmu_start(struct perf_event *event, int flags) +void riscv_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index dca3537a8dcc..adc910e0f1e2 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -693,6 +694,73 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return 0; } +#ifdef CONFIG_CPU_PM +static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + void *v) +{ + struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); + struct perf_event *event; + int idx; + + if (!enabled) + return NOTIFY_OK; + + for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) { + event = cpuc->events[idx]; + if (!event) + continue; + + switch (cmd) { + case CPU_PM_ENTER: + /* + * Stop and update the counter + */ + riscv_pmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* + * Restore and enable the counter. + * + * Requires RCU read locking to be functional, + * wrap the call within RCU_NONIDLE to make the + * RCU subsystem aware this cpu is not idle from + * an RCU perspective for the riscv_pmu_start() call + * duration. + */ + RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD)); + break; + default: + break; + } + } + + return NOTIFY_OK; +} + +static int riscv_pm_pmu_register(struct riscv_pmu *pmu) +{ + pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify; + return cpu_pm_register_notifier(&pmu->riscv_pm_nb); +} + +static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) +{ + cpu_pm_unregister_notifier(&pmu->riscv_pm_nb); +} +#else +static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; } +static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } +#endif + +static void riscv_pmu_destroy(struct riscv_pmu *pmu) +{ + riscv_pm_pmu_unregister(pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); +} + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; @@ -733,14 +801,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) return ret; + ret = riscv_pm_pmu_register(pmu); + if (ret) + goto out_unregister; + ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); - if (ret) { - cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); - return ret; - } + if (ret) + goto out_unregister; return 0; +out_unregister: + riscv_pmu_destroy(pmu); + out_free: kfree(pmu); return ret; diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 46f9b6fe306e..bf66fe011fa8 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -56,9 +56,13 @@ struct riscv_pmu { struct cpu_hw_events __percpu *hw_events; struct hlist_node node; + struct notifier_block riscv_pm_nb; }; #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) + +void riscv_pmu_start(struct perf_event *event, int flags); +void riscv_pmu_stop(struct perf_event *event, int flags); unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); -- cgit v1.2.3 From 66637ab137b44914356a9dc7a9b3f8ebcf0b0695 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Tue, 28 Jun 2022 14:34:19 +0800 Subject: drivers/perf: hisi: add driver for HNS3 PMU HNS3(HiSilicon Network System 3) PMU is RCiEP device in HiSilicon SoC NIC, supports collection of performance statistics such as bandwidth, latency, packet rate and interrupt rate. NIC of each SICL has one PMU device for it. Driver registers each PMU device to perf, and exports information of supported events, filter mode of each event, bdf range, hardware clock frequency, identifier and so on via sysfs. Each PMU device has its own registers of control, counters and interrupt, and it supports 8 hardware events, each hardward event has its own registers for configuration, counters and interrupt. Filter options contains: config - select event port - select physical port of nic tc - select tc(must be used with port) func - select PF/VF queue - select queue of PF/VF(must be used with func) intr - select interrupt number(must be used with func) global - select all functions of IO DIE Signed-off-by: Guangbin Huang Reviewed-by: John Garry Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220628063419.38514-3-huangguangbin2@huawei.com Signed-off-by: Will Deacon --- MAINTAINERS | 6 + drivers/perf/hisilicon/Kconfig | 10 + drivers/perf/hisilicon/Makefile | 1 + drivers/perf/hisilicon/hns3_pmu.c | 1671 +++++++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 5 files changed, 1689 insertions(+) create mode 100644 drivers/perf/hisilicon/hns3_pmu.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..6fc770ed6292 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8944,6 +8944,12 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst F: Documentation/admin-guide/perf/hisi-pmu.rst F: drivers/perf/hisilicon +HISILICON HNS3 PMU DRIVER +M: Guangbin Huang +S: Supported +F: Documentation/admin-guide/perf/hns3-pmu.rst +F: drivers/perf/hisilicon/hns3_pmu.c + HISILICON QM AND ZIP Controller DRIVER M: Zhou Wang L: linux-crypto@vger.kernel.org diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 5546218b5598..171bfc1b6bc2 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -14,3 +14,13 @@ config HISI_PCIE_PMU RCiEP devices. Adds the PCIe PMU into perf events system for monitoring latency, bandwidth etc. + +config HNS3_PMU + tristate "HNS3 PERF PMU" + depends on ARM64 || COMPILE_TEST + depends on PCI + help + Provide support for HNS3 performance monitoring unit (PMU) RCiEP + devices. + Adds the HNS3 PMU into perf events system for monitoring latency, + bandwidth etc. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 6be83517acaa..4d2c9abe3372 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o +obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c new file mode 100644 index 000000000000..e0457d84af6b --- /dev/null +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -0,0 +1,1671 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This driver adds support for HNS3 PMU iEP device. Related perf events are + * bandwidth, latency, packet rate, interrupt rate etc. + * + * Copyright (C) 2022 HiSilicon Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* registers offset address */ +#define HNS3_PMU_REG_GLOBAL_CTRL 0x0000 +#define HNS3_PMU_REG_CLOCK_FREQ 0x0020 +#define HNS3_PMU_REG_BDF 0x0fe0 +#define HNS3_PMU_REG_VERSION 0x0fe4 +#define HNS3_PMU_REG_DEVICE_ID 0x0fe8 + +#define HNS3_PMU_REG_EVENT_OFFSET 0x1000 +#define HNS3_PMU_REG_EVENT_SIZE 0x1000 +#define HNS3_PMU_REG_EVENT_CTRL_LOW 0x00 +#define HNS3_PMU_REG_EVENT_CTRL_HIGH 0x04 +#define HNS3_PMU_REG_EVENT_INTR_STATUS 0x08 +#define HNS3_PMU_REG_EVENT_INTR_MASK 0x0c +#define HNS3_PMU_REG_EVENT_COUNTER 0x10 +#define HNS3_PMU_REG_EVENT_EXT_COUNTER 0x18 +#define HNS3_PMU_REG_EVENT_QID_CTRL 0x28 +#define HNS3_PMU_REG_EVENT_QID_PARA 0x2c + +#define HNS3_PMU_FILTER_SUPPORT_GLOBAL BIT(0) +#define HNS3_PMU_FILTER_SUPPORT_PORT BIT(1) +#define HNS3_PMU_FILTER_SUPPORT_PORT_TC BIT(2) +#define HNS3_PMU_FILTER_SUPPORT_FUNC BIT(3) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE BIT(4) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR BIT(5) + +#define HNS3_PMU_FILTER_ALL_TC 0xf +#define HNS3_PMU_FILTER_ALL_QUEUE 0xffff + +#define HNS3_PMU_CTRL_SUBEVENT_S 4 +#define HNS3_PMU_CTRL_FILTER_MODE_S 24 + +#define HNS3_PMU_GLOBAL_START BIT(0) + +#define HNS3_PMU_EVENT_STATUS_RESET BIT(11) +#define HNS3_PMU_EVENT_EN BIT(12) +#define HNS3_PMU_EVENT_OVERFLOW_RESTART BIT(15) + +#define HNS3_PMU_QID_PARA_FUNC_S 0 +#define HNS3_PMU_QID_PARA_QUEUE_S 16 + +#define HNS3_PMU_QID_CTRL_REQ_ENABLE BIT(0) +#define HNS3_PMU_QID_CTRL_DONE BIT(1) +#define HNS3_PMU_QID_CTRL_MISS BIT(2) + +#define HNS3_PMU_INTR_MASK_OVERFLOW BIT(1) + +#define HNS3_PMU_MAX_HW_EVENTS 8 + +/* + * Each hardware event contains two registers (counter and ext_counter) for + * bandwidth, packet rate, latency and interrupt rate. These two registers will + * be triggered to run at the same when a hardware event is enabled. The meaning + * of counter and ext_counter of different event type are different, their + * meaning show as follow: + * + * +----------------+------------------+---------------+ + * | event type | counter | ext_counter | + * +----------------+------------------+---------------+ + * | bandwidth | byte number | cycle number | + * +----------------+------------------+---------------+ + * | packet rate | packet number | cycle number | + * +----------------+------------------+---------------+ + * | latency | cycle number | packet number | + * +----------------+------------------+---------------+ + * | interrupt rate | interrupt number | cycle number | + * +----------------+------------------+---------------+ + * + * The cycle number indicates increment of counter of hardware timer, the + * frequency of hardware timer can be read from hw_clk_freq file. + * + * Performance of each hardware event is calculated by: counter / ext_counter. + * + * Since processing of data is preferred to be done in userspace, we expose + * ext_counter as a separate event for userspace and use bit 16 to indicate it. + * For example, event 0x00001 and 0x10001 are actually one event for hardware + * because bit 0-15 are same. If the bit 16 of one event is 0 means to read + * counter register, otherwise means to read ext_counter register. + */ +/* bandwidth events */ +#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM 0x00001 +#define HNS3_PMU_EVT_BW_SSU_EGU_TIME 0x10001 +#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM 0x00002 +#define HNS3_PMU_EVT_BW_SSU_RPU_TIME 0x10002 +#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM 0x00003 +#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME 0x10003 +#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM 0x00004 +#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME 0x10004 +#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM 0x00005 +#define HNS3_PMU_EVT_BW_TPU_SSU_TIME 0x10005 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM 0x00006 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME 0x10006 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM 0x00008 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME 0x10008 +#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM 0x00009 +#define HNS3_PMU_EVT_BW_WR_FBD_TIME 0x10009 +#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM 0x0000a +#define HNS3_PMU_EVT_BW_WR_EBD_TIME 0x1000a +#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM 0x0000b +#define HNS3_PMU_EVT_BW_RD_FBD_TIME 0x1000b +#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM 0x0000c +#define HNS3_PMU_EVT_BW_RD_EBD_TIME 0x1000c +#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM 0x0000d +#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME 0x1000d +#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM 0x0000e +#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME 0x1000e +#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM 0x0000f +#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME 0x1000f +#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM 0x00010 +#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME 0x10010 + +/* packet rate events */ +#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM 0x00100 +#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME 0x10100 +#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM 0x00101 +#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME 0x10101 +#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM 0x00102 +#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME 0x10102 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM 0x00103 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME 0x10103 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM 0x00104 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME 0x10104 +#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM 0x00105 +#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME 0x10105 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM 0x00106 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME 0x10106 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM 0x00107 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME 0x10107 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM 0x00108 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME 0x10108 +#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM 0x00109 +#define HNS3_PMU_EVT_PPS_WR_FBD_TIME 0x10109 +#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM 0x0010a +#define HNS3_PMU_EVT_PPS_WR_EBD_TIME 0x1010a +#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM 0x0010b +#define HNS3_PMU_EVT_PPS_RD_FBD_TIME 0x1010b +#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM 0x0010c +#define HNS3_PMU_EVT_PPS_RD_EBD_TIME 0x1010c +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM 0x0010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME 0x1010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM 0x0010e +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME 0x1010e +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM 0x0010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME 0x1010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM 0x00110 +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME 0x10110 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM 0x00111 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME 0x10111 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM 0x00112 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME 0x10112 + +/* latency events */ +#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME 0x00202 +#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM 0x10202 +#define HNS3_PMU_EVT_DLY_TX_TIME 0x00204 +#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM 0x10204 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME 0x00206 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM 0x10206 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME 0x00207 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM 0x10207 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME 0x00208 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM 0x10208 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME 0x00209 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM 0x10209 +#define HNS3_PMU_EVT_DLY_RPU_TIME 0x0020e +#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM 0x1020e +#define HNS3_PMU_EVT_DLY_TPU_TIME 0x0020f +#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM 0x1020f +#define HNS3_PMU_EVT_DLY_RPE_TIME 0x00210 +#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM 0x10210 +#define HNS3_PMU_EVT_DLY_TPE_TIME 0x00211 +#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM 0x10211 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME 0x00212 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM 0x10212 +#define HNS3_PMU_EVT_DLY_WR_FBD_TIME 0x00213 +#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM 0x10213 +#define HNS3_PMU_EVT_DLY_WR_EBD_TIME 0x00214 +#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM 0x10214 +#define HNS3_PMU_EVT_DLY_RD_FBD_TIME 0x00215 +#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM 0x10215 +#define HNS3_PMU_EVT_DLY_RD_EBD_TIME 0x00216 +#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM 0x10216 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME 0x00217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM 0x10217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME 0x00218 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM 0x10218 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME 0x00219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM 0x10219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME 0x0021a +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM 0x1021a +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME 0x0021c +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM 0x1021c + +/* interrupt rate events */ +#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM 0x00300 +#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME 0x10300 + +/* filter mode supported by each bandwidth event */ +#define HNS3_PMU_FILTER_BW_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_BW_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_BW_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_BW_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_BW_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_BW_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_BW_WR_FBD 0x1b +#define HNS3_PMU_FILTER_BW_WR_EBD 0x11 +#define HNS3_PMU_FILTER_BW_RD_FBD 0x01 +#define HNS3_PMU_FILTER_BW_RD_EBD 0x1b +#define HNS3_PMU_FILTER_BW_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M1 0x01 + +/* filter mode supported by each packet rate event */ +#define HNS3_PMU_FILTER_PPS_IGU_SSU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_PPS_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_PPS_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_PPS_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_PPS_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_PPS_RCBTX_TPU 0x1f +#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_PPS_WR_FBD 0x1b +#define HNS3_PMU_FILTER_PPS_WR_EBD 0x11 +#define HNS3_PMU_FILTER_PPS_RD_FBD 0x01 +#define HNS3_PMU_FILTER_PPS_RD_EBD 0x1b +#define HNS3_PMU_FILTER_PPS_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE 0x01 + +/* filter mode supported by each latency event */ +#define HNS3_PMU_FILTER_DLY_TX_PUSH 0x01 +#define HNS3_PMU_FILTER_DLY_TX 0x01 +#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_RPU 0x11 +#define HNS3_PMU_FILTER_DLY_TPU 0x1f +#define HNS3_PMU_FILTER_DLY_RPE 0x01 +#define HNS3_PMU_FILTER_DLY_TPE 0x0b +#define HNS3_PMU_FILTER_DLY_TPE_PUSH 0x1b +#define HNS3_PMU_FILTER_DLY_WR_FBD 0x1b +#define HNS3_PMU_FILTER_DLY_WR_EBD 0x11 +#define HNS3_PMU_FILTER_DLY_RD_FBD 0x01 +#define HNS3_PMU_FILTER_DLY_RD_EBD 0x1b +#define HNS3_PMU_FILTER_DLY_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_MSIX_WRITE 0x01 + +/* filter mode supported by each interrupt rate event */ +#define HNS3_PMU_FILTER_INTR_MSIX_NIC 0x01 + +enum hns3_pmu_hw_filter_mode { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +struct hns3_pmu_event_attr { + u32 event; + u16 filter_support; +}; + +struct hns3_pmu { + struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS]; + struct hlist_node node; + struct pci_dev *pdev; + struct pmu pmu; + void __iomem *base; + int irq; + int on_cpu; + u32 identifier; + u32 hw_clk_freq; /* hardware clock frequency of PMU */ + /* maximum and minimum bdf allowed by PMU */ + u16 bdf_min; + u16 bdf_max; +}; + +#define to_hns3_pmu(p) (container_of((p), struct hns3_pmu, pmu)) + +#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff) + +#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff) +#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07)) +#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func)) + +#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end) \ + static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } + +HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7); +HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15); +HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16); +HNS3_PMU_FILTER_ATTR(port, config1, 0, 3); +HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7); +HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23); +HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39); +HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51); +HNS3_PMU_FILTER_ATTR(global, config1, 52, 52); + +#define HNS3_BW_EVT_BYTE_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_BYTE_NUM, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_BW_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_TIME, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_PPS_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_DLY_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_TIME, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_INTR_EVT_INTR_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_INTR_NUM, \ + HNS3_PMU_FILTER_INTR_##_name}) +#define HNS3_INTR_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_INTR_##_name}) + +static ssize_t hns3_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static ssize_t hns3_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + return sysfs_emit(buf, "config=0x%x\n", event->event); +} + +static ssize_t hns3_pmu_filter_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + int len; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + len = sysfs_emit_at(buf, 0, "filter mode supported: "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL) + len += sysfs_emit_at(buf, len, "global "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT) + len += sysfs_emit_at(buf, len, "port "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC) + len += sysfs_emit_at(buf, len, "port-tc "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC) + len += sysfs_emit_at(buf, len, "func "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE) + len += sysfs_emit_at(buf, len, "func-queue "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR) + len += sysfs_emit_at(buf, len, "func-intr "); + + len += sysfs_emit_at(buf, len, "\n"); + + return len; +} + +#define HNS3_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define HNS3_PMU_FORMAT_ATTR(_name, _format) \ + HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format) +#define HNS3_PMU_EVENT_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event) +#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event) + +#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +static u8 hns3_pmu_hw_filter_modes[] = { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \ + ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)]) + +static ssize_t identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier); +} +static DEVICE_ATTR_RO(identifier); + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_min; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_max; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_max); + +static ssize_t hw_clk_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq); +} +static DEVICE_ATTR_RO(hw_clk_freq); + +static struct attribute *hns3_pmu_events_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute *hns3_pmu_filter_mode_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute_group hns3_pmu_events_group = { + .name = "events", + .attrs = hns3_pmu_events_attr, +}; + +static struct attribute_group hns3_pmu_filter_mode_group = { + .name = "filtermode", + .attrs = hns3_pmu_filter_mode_attr, +}; + +static struct attribute *hns3_pmu_format_attr[] = { + HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"), + HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"), + HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"), + HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"), + HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"), + HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"), + HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"), + HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"), + HNS3_PMU_FORMAT_ATTR(global, "config1:52"), + NULL +}; + +static struct attribute_group hns3_pmu_format_group = { + .name = "format", + .attrs = hns3_pmu_format_attr, +}; + +static struct attribute *hns3_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group hns3_pmu_cpumask_attr_group = { + .attrs = hns3_pmu_cpumask_attrs, +}; + +static struct attribute *hns3_pmu_identifier_attrs[] = { + &dev_attr_identifier.attr, + NULL +}; + +static struct attribute_group hns3_pmu_identifier_attr_group = { + .attrs = hns3_pmu_identifier_attrs, +}; + +static struct attribute *hns3_pmu_bdf_range_attrs[] = { + &dev_attr_bdf_min.attr, + &dev_attr_bdf_max.attr, + NULL +}; + +static struct attribute_group hns3_pmu_bdf_range_attr_group = { + .attrs = hns3_pmu_bdf_range_attrs, +}; + +static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = { + &dev_attr_hw_clk_freq.attr, + NULL +}; + +static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = { + .attrs = hns3_pmu_hw_clk_freq_attrs, +}; + +static const struct attribute_group *hns3_pmu_attr_groups[] = { + &hns3_pmu_events_group, + &hns3_pmu_filter_mode_group, + &hns3_pmu_format_group, + &hns3_pmu_cpumask_attr_group, + &hns3_pmu_identifier_attr_group, + &hns3_pmu_bdf_range_attr_group, + &hns3_pmu_hw_clk_freq_attr_group, + NULL +}; + +static u32 hns3_pmu_get_event(struct perf_event *event) +{ + return hns3_pmu_get_ext_counter_used(event) << 16 | + hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_real_event(struct perf_event *event) +{ + return hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_offset(u32 offset, u32 idx) +{ + return offset + HNS3_PMU_REG_EVENT_OFFSET + + HNS3_PMU_REG_EVENT_SIZE * idx; +} + +static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readl(hns3_pmu->base + offset); +} + +static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u32 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writel(val, hns3_pmu->base + offset); +} + +static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readq(hns3_pmu->base + offset); +} + +static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u64 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writeq(val, hns3_pmu->base + offset); +} + +static bool hns3_pmu_cmp_event(struct perf_event *target, + struct perf_event *event) +{ + return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event); +} + +static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu, + struct perf_event *event) +{ + struct perf_event *sibling; + int hw_event_used = 0; + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + sibling = hns3_pmu->hw_events[idx]; + if (!sibling) + continue; + + hw_event_used++; + + if (!hns3_pmu_cmp_event(sibling, event)) + continue; + + /* Related events is used in group */ + if (sibling->group_leader == event->group_leader) + return idx; + } + + /* No related event and all hardware events are used up */ + if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS) + return -EBUSY; + + /* No related event and there is extra hardware events can be use */ + return -ENOENT; +} + +static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu) +{ + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + if (!hns3_pmu->hw_events[idx]) + return idx; + } + + return -EBUSY; +} + +static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf) +{ + struct pci_dev *pdev; + + if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) { + pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf); + return false; + } + + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus), + PCI_BUS_NUM(bdf), + GET_PCI_DEVFN(bdf)); + if (!pdev) { + pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf); + return false; + } + + pci_dev_put(pdev); + return true; +} + +static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + u32 val; + + val = GET_PCI_DEVFN(bdf); + val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val); +} + +static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx) +{ + bool queue_id_valid = false; + u32 reg_qid_ctrl, val; + int err; + + /* enable queue id request */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, + HNS3_PMU_QID_CTRL_REQ_ENABLE); + + reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx); + err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val, + val & HNS3_PMU_QID_CTRL_DONE, 1, 1000); + if (err == -ETIMEDOUT) { + pci_err(hns3_pmu->pdev, "QID request timeout!\n"); + goto out; + } + + queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS); + +out: + /* disable qid request and clear status */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0); + + return queue_id_valid; +} + +static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue); + + return hns3_pmu_qid_req_start(hns3_pmu, idx); +} + +static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event) +{ + struct hns3_pmu_event_attr *pmu_event; + struct dev_ext_attribute *eattr; + struct device_attribute *dattr; + struct attribute *attr; + u32 i; + + for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) { + attr = hns3_pmu_events_attr[i]; + dattr = container_of(attr, struct device_attribute, attr); + eattr = container_of(dattr, struct dev_ext_attribute, attr); + pmu_event = eattr->var; + + if (event == pmu_event->event) + return pmu_event; + } + + return NULL; +} + +static int hns3_pmu_set_func_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC); + + return 0; +} + +static int hns3_pmu_set_func_queue_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) { + pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id); + return -ENOENT; + } + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE); + + return 0; +} + +static bool +hns3_pmu_is_enabled_global_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 global = hns3_pmu_get_global(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)) + return false; + + return global; +} + +static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)) + return false; + else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool +hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)) + return false; + else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)) + return false; + + return tc_id == HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)) + return false; + + return tc_id != HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)) + return false; + + return hns3_pmu_valid_bdf(hns3_pmu, bdf); +} + +static int hns3_pmu_select_filter_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u32 event_id = hns3_pmu_get_event(event); + struct hw_perf_event *hwc = &event->hw; + struct hns3_pmu_event_attr *pmu_event; + + pmu_event = hns3_pmu_get_pmu_event(event_id); + if (!pmu_event) { + pci_err(hns3_pmu->pdev, "Invalid pmu event\n"); + return -ENOENT; + } + + if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL); + return 0; + } + + if (hns3_pmu_is_enabled_func_mode(event, pmu_event)) + return hns3_pmu_set_func_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event)) + return hns3_pmu_set_func_queue_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT); + return 0; + } + + if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC); + return 0; + } + + if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR); + return 0; + } + + return -ENOENT; +} + +static bool hns3_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS]; + int counters = 1; + int num; + + event_group[0] = leader; + if (!is_software_event(leader)) { + if (leader->pmu != event->pmu) + return false; + + if (leader != event && !hns3_pmu_cmp_event(leader, event)) + event_group[counters++] = event; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; + + if (sibling->pmu != event->pmu) + return false; + + for (num = 0; num < counters; num++) { + if (hns3_pmu_cmp_event(event_group[num], sibling)) + break; + } + + if (num == counters) + event_group[counters++] = sibling; + } + + return counters <= HNS3_PMU_MAX_HW_EVENTS; +} + +static u32 hns3_pmu_get_filter_condition(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u16 intr_id = hns3_pmu_get_intr(event); + u8 port_id = hns3_pmu_get_port(event); + u16 bdf = hns3_pmu_get_bdf(event); + u8 tc_id = hns3_pmu_get_tc(event); + u8 filter_mode; + + filter_mode = *(u8 *)hwc->addr_filters; + switch (filter_mode) { + case HNS3_PMU_HW_FILTER_PORT: + return FILTER_CONDITION_PORT(port_id); + case HNS3_PMU_HW_FILTER_PORT_TC: + return FILTER_CONDITION_PORT_TC(port_id, tc_id); + case HNS3_PMU_HW_FILTER_FUNC: + case HNS3_PMU_HW_FILTER_FUNC_QUEUE: + return GET_PCI_DEVFN(bdf); + case HNS3_PMU_HW_FILTER_FUNC_INTR: + return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id); + default: + break; + } + + return 0; +} + +static void hns3_pmu_config_filter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u8 event_type = hns3_pmu_get_event_type(event); + u8 subevent_id = hns3_pmu_get_subevent(event); + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u8 filter_mode = *(u8 *)hwc->addr_filters; + u16 bdf = hns3_pmu_get_bdf(event); + u32 idx = hwc->idx; + u32 val; + + val = event_type; + val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S; + val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S; + val |= HNS3_PMU_EVENT_OVERFLOW_RESTART; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_get_filter_condition(event); + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val); + + if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE) + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id); +} + +static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val &= ~HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val |= HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx) +{ + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static u64 hns3_pmu_read_counter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + + return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx); +} + +static void hns3_pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u32 idx = event->hw.idx; + + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value); + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value); +} + +static void hns3_pmu_init_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + local64_set(&hwc->prev_count, 0); + hns3_pmu_write_counter(event, 0); +} + +static int hns3_pmu_event_init(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling is not supported */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + event->cpu = hns3_pmu->on_cpu; + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) { + pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n", + HNS3_PMU_MAX_HW_EVENTS); + return -EBUSY; + } + + hwc->idx = idx; + + ret = hns3_pmu_select_filter_mode(event, hns3_pmu); + if (ret) { + pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret); + return ret; + } + + if (!hns3_pmu_validate_event_group(event)) { + pci_err(hns3_pmu->pdev, "Invalid event group.\n"); + return -EINVAL; + } + + if (hns3_pmu_get_ext_counter_used(event)) + hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER; + else + hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER; + + return 0; +} + +static void hns3_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = hns3_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != + prev_cnt); + + delta = new_cnt - prev_cnt; + local64_add(delta, &event->count); +} + +static void hns3_pmu_start(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + hns3_pmu_config_filter(event); + hns3_pmu_init_counter(event); + hns3_pmu_enable_intr(hns3_pmu, hwc); + hns3_pmu_enable_counter(hns3_pmu, hwc); + + perf_event_update_userpage(event); +} + +static void hns3_pmu_stop(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_disable_counter(hns3_pmu, hwc); + hns3_pmu_disable_intr(hns3_pmu, hwc); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* Read hardware counter and update the perf counter statistics */ + hns3_pmu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int hns3_pmu_add(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + /* Check all working events to find a related event. */ + idx = hns3_pmu_find_related_event_idx(hns3_pmu, event); + if (idx < 0 && idx != -ENOENT) + return idx; + + /* Current event shares an enabled hardware event with related event */ + if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) { + hwc->idx = idx; + goto start_count; + } + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) + return idx; + + hwc->idx = idx; + hns3_pmu->hw_events[idx] = event; + +start_count: + if (flags & PERF_EF_START) + hns3_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void hns3_pmu_del(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_stop(event, PERF_EF_UPDATE); + hns3_pmu->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static void hns3_pmu_enable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val |= HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static void hns3_pmu_disable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val &= ~HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + u16 device_id; + char *name; + u32 val; + + hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2]; + if (!hns3_pmu->base) { + pci_err(pdev, "ioremap failed\n"); + return -ENOMEM; + } + + hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ); + + val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF); + hns3_pmu->bdf_min = val & 0xffff; + hns3_pmu->bdf_max = val >> 16; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID); + device_id = val & 0xffff; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id); + if (!name) + return -ENOMEM; + + hns3_pmu->pdev = pdev; + hns3_pmu->on_cpu = -1; + hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION); + hns3_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .event_init = hns3_pmu_event_init, + .pmu_enable = hns3_pmu_enable, + .pmu_disable = hns3_pmu_disable, + .add = hns3_pmu_add, + .del = hns3_pmu_del, + .start = hns3_pmu_start, + .stop = hns3_pmu_stop, + .read = hns3_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = hns3_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + return 0; +} + +static irqreturn_t hns3_pmu_irq(int irq, void *data) +{ + struct hns3_pmu *hns3_pmu = data; + u32 intr_status, idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + intr_status = hns3_pmu_readl(hns3_pmu, + HNS3_PMU_REG_EVENT_INTR_STATUS, + idx); + + /* + * As each counter will restart from 0 when it is overflowed, + * extra processing is no need, just clear interrupt status. + */ + if (intr_status) + hns3_pmu_clear_intr_status(hns3_pmu, idx); + } + + return IRQ_HANDLED; +} + +static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + if (hns3_pmu->on_cpu == -1) { + hns3_pmu->on_cpu = cpu; + irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu)); + } + + return 0; +} + +static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + unsigned int target; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + /* Nothing to do if this CPU doesn't own the PMU */ + if (hns3_pmu->on_cpu != cpu) + return 0; + + /* Choose a new CPU from all online cpus */ + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target); + hns3_pmu->on_cpu = target; + irq_set_affinity(hns3_pmu->irq, cpumask_of(target)); + + return 0; +} + +static void hns3_pmu_free_irq(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + +static int hns3_pmu_irq_register(struct pci_dev *pdev, + struct hns3_pmu *hns3_pmu) +{ + int irq, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret); + return ret; + } + + ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + if (ret) { + pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); + return ret; + } + + irq = pci_irq_vector(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0, + hns3_pmu->pmu.name, hns3_pmu); + if (ret) { + pci_err(pdev, "failed to register irq, ret = %d.\n", ret); + return ret; + } + + hns3_pmu->irq = irq; + + return 0; +} + +static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + int ret; + + ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu); + if (ret) + return ret; + + ret = hns3_pmu_irq_register(pdev, hns3_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + if (ret) { + pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret); + return ret; + } + + ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1); + if (ret) { + pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + } + + return ret; +} + +static void hns3_pmu_uninit_pmu(struct pci_dev *pdev) +{ + struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev); + + perf_pmu_unregister(&hns3_pmu->pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); +} + +static int hns3_pmu_init_dev(struct pci_dev *pdev) +{ + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu"); + if (ret < 0) { + pci_err(pdev, "failed to request pci region, ret = %d.\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hns3_pmu *hns3_pmu; + int ret; + + hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL); + if (!hns3_pmu) + return -ENOMEM; + + ret = hns3_pmu_init_dev(pdev); + if (ret) + return ret; + + ret = hns3_pmu_init_pmu(pdev, hns3_pmu); + if (ret) { + pci_clear_master(pdev); + return ret; + } + + pci_set_drvdata(pdev, hns3_pmu); + + return ret; +} + +static void hns3_pmu_remove(struct pci_dev *pdev) +{ + hns3_pmu_uninit_pmu(pdev); + pci_clear_master(pdev); + pci_set_drvdata(pdev, NULL); +} + +static const struct pci_device_id hns3_pmu_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, hns3_pmu_ids); + +static struct pci_driver hns3_pmu_driver = { + .name = "hns3_pmu", + .id_table = hns3_pmu_ids, + .probe = hns3_pmu_probe, + .remove = hns3_pmu_remove, +}; + +static int __init hns3_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + "AP_PERF_ARM_HNS3_PMU_ONLINE", + hns3_pmu_online_cpu, + hns3_pmu_offline_cpu); + if (ret) { + pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret); + return ret; + } + + ret = pci_register_driver(&hns3_pmu_driver); + if (ret) { + pr_err("failed to register pci driver, ret = %d.\n", ret); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); + } + + return ret; +} +module_init(hns3_pmu_module_init); + +static void __exit hns3_pmu_module_exit(void) +{ + pci_unregister_driver(&hns3_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); +} +module_exit(hns3_pmu_module_exit); + +MODULE_DESCRIPTION("HNS3 PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..3e99fb4d3134 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -230,6 +230,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From 70c248aca9e7efa85a6664d5ab56c17c326c958f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jun 2022 16:21:39 +0100 Subject: mm: kasan: Skip unpoisoning of user pages Commit c275c5c6d50a ("kasan: disable freed user page poisoning with HW tags") added __GFP_SKIP_KASAN_POISON to GFP_HIGHUSER_MOVABLE. A similar argument can be made about unpoisoning, so also add __GFP_SKIP_KASAN_UNPOISON to user pages. To ensure the user page is still accessible via page_address() without a kasan fault, reset the page->flags tag. With the above changes, there is no need for the arm64 tag_clear_highpage() to reset the page->flags tag. Signed-off-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Peter Collingbourne Cc: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220610152141.2148929-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 1 - include/linux/gfp.h | 2 +- mm/page_alloc.c | 7 +++++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..cdf3ffa0c223 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - page_kasan_tag_reset(page); set_bit(PG_mte_tagged, &page->flags); } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e008a3df0485..f6ed240870bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2397,6 +2397,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + int i; set_page_private(page, 0); set_page_refcounted(page); @@ -2422,8 +2423,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * should be initialized as well). */ if (init_tags) { - int i; - /* Initialize both memory and tags. */ for (i = 0; i != 1 << order; ++i) tag_clear_highpage(page + i); @@ -2438,6 +2437,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by KASAN. */ if (kasan_has_integrated_init()) init = false; + } else { + /* Ensure page_address() dereferencing does not fault. */ + for (i = 0; i != 1 << order; ++i) + page_kasan_tag_reset(page + i); } /* If memory is still not initialized, do it now. */ if (init) -- cgit v1.2.3 From d0637c505f8a1d8c4088642f1f3e9e3b22da14f6 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 20 Jul 2022 21:37:37 +1200 Subject: arm64: enable THP_SWAP for arm64 THP_SWAP has been proven to improve the swap throughput significantly on x86_64 according to commit bd4c82c22c367e ("mm, THP, swap: delay splitting THP after swapped out"). As long as arm64 uses 4K page size, it is quite similar with x86_64 by having 2MB PMD THP. THP_SWAP is architecture-independent, thus, enabling it on arm64 will benefit arm64 as well. A corner case is that MTE has an assumption that only base pages can be swapped. We won't enable THP_SWAP for ARM64 hardware with MTE support until MTE is reworked to coexist with THP_SWAP. A micro-benchmark is written to measure thp swapout throughput as below, unsigned long long tv_to_ms(struct timeval tv) { return tv.tv_sec * 1000 + tv.tv_usec / 1000; } main() { struct timeval tv_b, tv_e;; #define SIZE 400*1024*1024 volatile void *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!p) { perror("fail to get memory"); exit(-1); } madvise(p, SIZE, MADV_HUGEPAGE); memset(p, 0x11, SIZE); /* write to get mem */ gettimeofday(&tv_b, NULL); madvise(p, SIZE, MADV_PAGEOUT); gettimeofday(&tv_e, NULL); printf("swp out bandwidth: %ld bytes/ms\n", SIZE/(tv_to_ms(tv_e) - tv_to_ms(tv_b))); } Testing is done on rk3568 64bit Quad Core Cortex-A55 platform - ROCK 3A. thp swp throughput w/o patch: 2734bytes/ms (mean of 10 tests) thp swp throughput w/ patch: 3331bytes/ms (mean of 10 tests) Cc: "Huang, Ying" Cc: Minchan Kim Cc: Johannes Weiner Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Steven Price Cc: Yang Shi Reviewed-by: Anshuman Khandual Signed-off-by: Barry Song Link: https://lore.kernel.org/r/20220720093737.133375-1-21cnbao@gmail.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable.h | 6 ++++++ include/linux/huge_mm.h | 12 ++++++++++++ mm/swap_slots.c | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..e1c540e80eec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b6632f18364..78d6f6014bfb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,6 +45,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2a65a89b5b4d..10b94d64cc25 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio) entry.val = 0; if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) + if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } -- cgit v1.2.3