summaryrefslogtreecommitdiffstats
path: root/arch/nds32/kernel/perf_event_cpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/nds32/kernel/perf_event_cpu.c')
-rw-r--r--arch/nds32/kernel/perf_event_cpu.c1500
1 files changed, 0 insertions, 1500 deletions
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
deleted file mode 100644
index a78a879e7ef1..000000000000
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ /dev/null
@@ -1,1500 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2008-2017 Andes Technology Corporation
- *
- * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
- * 2010 (c) MontaVista Software, LLC.
- */
-
-#include <linux/perf_event.h>
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/pm_runtime.h>
-#include <linux/ftrace.h>
-#include <linux/uaccess.h>
-#include <linux/sched/clock.h>
-#include <linux/percpu-defs.h>
-
-#include <asm/pmu.h>
-#include <asm/irq_regs.h>
-#include <asm/nds32.h>
-#include <asm/stacktrace.h>
-#include <asm/perf_event.h>
-#include <nds32_intrinsic.h>
-
-/* Set at runtime when we know what CPU type we are. */
-static struct nds32_pmu *cpu_pmu;
-
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
-static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
-static struct platform_device_id cpu_pmu_plat_device_ids[] = {
- {.name = "nds32-pfm"},
- {},
-};
-
-static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
-{
- unsigned int cache_type, cache_op, cache_result, ret;
-
- cache_type = (config >> 0) & 0xff;
- if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
- return -EINVAL;
-
- cache_op = (config >> 8) & 0xff;
- if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
- return -EINVAL;
-
- cache_result = (config >> 16) & 0xff;
- if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
- if (ret == CACHE_OP_UNSUPPORTED)
- return -ENOENT;
-
- return ret;
-}
-
-static int
-nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
- u64 config)
-{
- int mapping;
-
- if (config >= PERF_COUNT_HW_MAX)
- return -ENOENT;
-
- mapping = (*event_map)[config];
- return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
- int ev_type = (int)(config & raw_event_mask);
- int idx = config >> 8;
-
- switch (idx) {
- case 0:
- ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
- if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
- return -ENOENT;
- break;
- case 1:
- ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
- if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
- return -ENOENT;
- break;
- case 2:
- ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
- if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
- return -ENOENT;
- break;
- default:
- return -ENOENT;
- }
-
- return ev_type;
-}
-
-int
-nds32_pmu_map_event(struct perf_event *event,
- const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
- const unsigned int (*cache_map)
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
-{
- u64 config = event->attr.config;
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- return nds32_pmu_map_hw_event(event_map, config);
- case PERF_TYPE_HW_CACHE:
- return nds32_pmu_map_cache_event(cache_map, config);
- case PERF_TYPE_RAW:
- return nds32_pmu_map_raw_event(raw_event_mask, config);
- }
-
- return -ENOENT;
-}
-
-static int nds32_spav3_map_event(struct perf_event *event)
-{
- return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
- &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
-}
-
-static inline u32 nds32_pfm_getreset_flags(void)
-{
- /* Read overflow status */
- u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 old_val = val;
-
- /* Write overflow bit to clear status, and others keep it 0 */
- u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
-
- __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
-
- return old_val;
-}
-
-static inline int nds32_pfm_has_overflowed(u32 pfm)
-{
- u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
-
- return pfm & ov_flag;
-}
-
-static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
-{
- u32 mask = 0;
-
- switch (idx) {
- case 0:
- mask = PFM_CTL_OVF[0];
- break;
- case 1:
- mask = PFM_CTL_OVF[1];
- break;
- case 2:
- mask = PFM_CTL_OVF[2];
- break;
- default:
- pr_err("%s index wrong\n", __func__);
- break;
- }
- return pfm & mask;
-}
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the event disabled in hw:
- */
-int nds32_pmu_event_set_period(struct perf_event *event)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- s64 left = local64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
- int ret = 0;
-
- /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
- if (unlikely(period != hwc->last_period))
- left = period - (hwc->last_period - left);
-
- if (unlikely(left <= -period)) {
- left = period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
-
- if (unlikely(left <= 0)) {
- left += period;
- local64_set(&hwc->period_left, left);
- hwc->last_period = period;
- ret = 1;
- }
-
- if (left > (s64)nds32_pmu->max_period)
- left = nds32_pmu->max_period;
-
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extract future "deltas":
- */
- local64_set(&hwc->prev_count, (u64)(-left));
-
- nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
-
- perf_event_update_userpage(event);
-
- return ret;
-}
-
-static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
-{
- u32 pfm;
- struct perf_sample_data data;
- struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
- struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
- struct pt_regs *regs;
- int idx;
- /*
- * Get and reset the IRQ flags
- */
- pfm = nds32_pfm_getreset_flags();
-
- /*
- * Did an overflow occur?
- */
- if (!nds32_pfm_has_overflowed(pfm))
- return IRQ_NONE;
-
- /*
- * Handle the counter(s) overflow(s)
- */
- regs = get_irq_regs();
-
- nds32_pmu_stop(cpu_pmu);
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- /* Ignore if we don't have an event. */
- if (!event)
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!nds32_pfm_counter_has_overflowed(pfm, idx))
- continue;
-
- hwc = &event->hw;
- nds32_pmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!nds32_pmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
- nds32_pmu_start(cpu_pmu);
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
-{
- return ((idx >= 0) && (idx < cpu_pmu->num_events));
-}
-
-static inline int nds32_pfm_disable_counter(int idx)
-{
- unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 mask = 0;
-
- mask = PFM_CTL_EN[idx];
- val &= ~mask;
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
- return idx;
-}
-
-/*
- * Add an event filter to a given event.
- */
-static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
- struct perf_event_attr *attr)
-{
- unsigned long config_base = 0;
- int idx = event->idx;
- unsigned long no_kernel_tracing = 0;
- unsigned long no_user_tracing = 0;
- /* If index is -1, do not do anything */
- if (idx == -1)
- return 0;
-
- no_kernel_tracing = PFM_CTL_KS[idx];
- no_user_tracing = PFM_CTL_KU[idx];
- /*
- * Default: enable both kernel and user mode tracing.
- */
- if (attr->exclude_user)
- config_base |= no_user_tracing;
-
- if (attr->exclude_kernel)
- config_base |= no_kernel_tracing;
-
- /*
- * Install the filter into config_base as this is used to
- * construct the event type.
- */
- event->config_base |= config_base;
- return 0;
-}
-
-static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
-{
- u32 offset = 0;
- u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 ev_mask = 0;
- u32 no_kernel_mask = 0;
- u32 no_user_mask = 0;
- u32 val;
-
- offset = PFM_CTL_OFFSEL[idx];
- /* Clear previous mode selection, and write new one */
- no_kernel_mask = PFM_CTL_KS[idx];
- no_user_mask = PFM_CTL_KU[idx];
- ori_val &= ~no_kernel_mask;
- ori_val &= ~no_user_mask;
- if (evnum & no_kernel_mask)
- ori_val |= no_kernel_mask;
-
- if (evnum & no_user_mask)
- ori_val |= no_user_mask;
-
- /* Clear previous event selection */
- ev_mask = PFM_CTL_SEL[idx];
- ori_val &= ~ev_mask;
- evnum &= SOFTWARE_EVENT_MASK;
-
- /* undo the linear mapping */
- evnum = get_converted_evet_hw_num(evnum);
- val = ori_val | (evnum << offset);
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-}
-
-static inline int nds32_pfm_enable_counter(int idx)
-{
- unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 mask = 0;
-
- mask = PFM_CTL_EN[idx];
- val |= mask;
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
- return idx;
-}
-
-static inline int nds32_pfm_enable_intens(int idx)
-{
- unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 mask = 0;
-
- mask = PFM_CTL_IE[idx];
- val |= mask;
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
- return idx;
-}
-
-static inline int nds32_pfm_disable_intens(int idx)
-{
- unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- u32 mask = 0;
-
- mask = PFM_CTL_IE[idx];
- val &= ~mask;
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
- return idx;
-}
-
-static int event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
- /* Other modes NDS32 does not support */
- return attr->exclude_user || attr->exclude_kernel;
-}
-
-static void nds32_pmu_enable_event(struct perf_event *event)
-{
- unsigned long flags;
- unsigned int evnum = 0;
- struct hw_perf_event *hwc = &event->hw;
- struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
- int idx = hwc->idx;
-
- if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU enabling wrong pfm counter IRQ enable\n");
- return;
- }
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- nds32_pfm_disable_counter(idx);
-
- /*
- * Check whether we need to exclude the counter from certain modes.
- */
- if ((!cpu_pmu->set_event_filter ||
- cpu_pmu->set_event_filter(hwc, &event->attr)) &&
- event_requires_mode_exclusion(&event->attr)) {
- pr_notice
- ("NDS32 performance counters do not support mode exclusion\n");
- hwc->config_base = 0;
- }
- /* Write event */
- evnum = hwc->config_base;
- nds32_pfm_write_evtsel(idx, evnum);
-
- /*
- * Enable interrupt for this counter
- */
- nds32_pfm_enable_intens(idx);
-
- /*
- * Enable counter
- */
- nds32_pfm_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_disable_event(struct perf_event *event)
-{
- unsigned long flags;
- struct hw_perf_event *hwc = &event->hw;
- struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
- int idx = hwc->idx;
-
- if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
- return;
- }
-
- /*
- * Disable counter and interrupt
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- nds32_pfm_disable_counter(idx);
-
- /*
- * Disable interrupt for this counter
- */
- nds32_pfm_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static inline u32 nds32_pmu_read_counter(struct perf_event *event)
-{
- struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
- u32 count = 0;
-
- if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU reading wrong counter %d\n", idx);
- } else {
- switch (idx) {
- case PFMC0:
- count = __nds32__mfsr(NDS32_SR_PFMC0);
- break;
- case PFMC1:
- count = __nds32__mfsr(NDS32_SR_PFMC1);
- break;
- case PFMC2:
- count = __nds32__mfsr(NDS32_SR_PFMC2);
- break;
- default:
- pr_err
- ("%s: CPU has no performance counters %d\n",
- __func__, idx);
- }
- }
- return count;
-}
-
-static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
-{
- struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU writing wrong counter %d\n", idx);
- } else {
- switch (idx) {
- case PFMC0:
- __nds32__mtsr_isb(value, NDS32_SR_PFMC0);
- break;
- case PFMC1:
- __nds32__mtsr_isb(value, NDS32_SR_PFMC1);
- break;
- case PFMC2:
- __nds32__mtsr_isb(value, NDS32_SR_PFMC2);
- break;
- default:
- pr_err
- ("%s: CPU has no performance counters %d\n",
- __func__, idx);
- }
- }
-}
-
-static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx;
- struct hw_perf_event *hwc = &event->hw;
- /*
- * Current implementation maps cycles, instruction count and cache-miss
- * to specific counter.
- * However, multiple of the 3 counters are able to count these events.
- *
- *
- * SOFTWARE_EVENT_MASK mask for getting event num ,
- * This is defined by Jia-Rung, you can change the polocies.
- * However, do not exceed 8 bits. This is hardware specific.
- * The last number is SPAv3_2_SEL_LAST.
- */
- unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
-
- idx = get_converted_event_idx(evtype);
- /*
- * Try to get the counter for correpsonding event
- */
- if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
- return NDS32_IDX_COUNTER0;
- if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
- return NDS32_IDX_COUNTER1;
- } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
- return NDS32_IDX_COUNTER1;
- else if (!test_and_set_bit
- (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return NDS32_IDX_CYCLE_COUNTER;
- } else {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
- return -EAGAIN;
-}
-
-static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
-{
- unsigned long flags;
- unsigned int val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /* Enable all counters , NDS PFM has 3 counters */
- val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
-{
- unsigned long flags;
- unsigned int val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /* Disable all counters , NDS PFM has 3 counters */
- val = __nds32__mfsr(NDS32_SR_PFM_CTL);
- val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
- val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_reset(void *info)
-{
- u32 val = 0;
-
- val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
- __nds32__mtsr(val, NDS32_SR_PFM_CTL);
- __nds32__mtsr(0, NDS32_SR_PFM_CTL);
- __nds32__mtsr(0, NDS32_SR_PFMC0);
- __nds32__mtsr(0, NDS32_SR_PFMC1);
- __nds32__mtsr(0, NDS32_SR_PFMC2);
-}
-
-static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
-{
- cpu_pmu->handle_irq = nds32_pmu_handle_irq;
- cpu_pmu->enable = nds32_pmu_enable_event;
- cpu_pmu->disable = nds32_pmu_disable_event;
- cpu_pmu->read_counter = nds32_pmu_read_counter;
- cpu_pmu->write_counter = nds32_pmu_write_counter;
- cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
- cpu_pmu->start = nds32_pmu_start;
- cpu_pmu->stop = nds32_pmu_stop;
- cpu_pmu->reset = nds32_pmu_reset;
- cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */
-};
-
-static u32 nds32_read_num_pfm_events(void)
-{
- /* NDS32 SPAv3 PMU support 3 counter */
- return 3;
-}
-
-static int device_pmu_init(struct nds32_pmu *cpu_pmu)
-{
- nds32_pmu_init(cpu_pmu);
- /*
- * This name should be devive-specific name, whatever you like :)
- * I think "PMU" will be a good generic name.
- */
- cpu_pmu->name = "nds32v3-pmu";
- cpu_pmu->map_event = nds32_spav3_map_event;
- cpu_pmu->num_events = nds32_read_num_pfm_events();
- cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
- return 0;
-}
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct nds32_pmu *pmu)
-{
- int ret;
-
- get_cpu();
- ret = -ENODEV;
- /*
- * If ther are various CPU types with its own PMU, initialize with
- *
- * the corresponding one
- */
- device_pmu_init(pmu);
- put_cpu();
- return ret;
-}
-
-static void nds32_pmu_enable(struct pmu *pmu)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
- struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
- int enabled = bitmap_weight(hw_events->used_mask,
- nds32_pmu->num_events);
-
- if (enabled)
- nds32_pmu->start(nds32_pmu);
-}
-
-static void nds32_pmu_disable(struct pmu *pmu)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
-
- nds32_pmu->stop(nds32_pmu);
-}
-
-static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
-{
- nds32_pmu->free_irq(nds32_pmu);
- pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
-}
-
-static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
-{
- struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
- int ret;
- u64 start_clock, finish_clock;
-
- start_clock = local_clock();
- ret = nds32_pmu->handle_irq(irq, dev);
- finish_clock = local_clock();
-
- perf_sample_event_took(finish_clock - start_clock);
- return ret;
-}
-
-static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
-{
- int err;
- struct platform_device *pmu_device = nds32_pmu->plat_device;
-
- if (!pmu_device)
- return -ENODEV;
-
- pm_runtime_get_sync(&pmu_device->dev);
- err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
- if (err) {
- nds32_pmu_release_hardware(nds32_pmu);
- return err;
- }
-
- return 0;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
- struct perf_event *event)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-
- if (is_software_event(event))
- return 1;
-
- if (event->pmu != pmu)
- return 0;
-
- if (event->state < PERF_EVENT_STATE_OFF)
- return 1;
-
- if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
- return 1;
-
- return nds32_pmu->get_event_idx(hw_events, event) >= 0;
-}
-
-static int validate_group(struct perf_event *event)
-{
- struct perf_event *sibling, *leader = event->group_leader;
- struct pmu_hw_events fake_pmu;
- DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
- /*
- * Initialize the fake PMU. We only need to populate the
- * used_mask for the purposes of validation.
- */
- memset(fake_used_mask, 0, sizeof(fake_used_mask));
-
- if (!validate_event(event->pmu, &fake_pmu, leader))
- return -EINVAL;
-
- for_each_sibling_event(sibling, leader) {
- if (!validate_event(event->pmu, &fake_pmu, sibling))
- return -EINVAL;
- }
-
- if (!validate_event(event->pmu, &fake_pmu, event))
- return -EINVAL;
-
- return 0;
-}
-
-static int __hw_perf_event_init(struct perf_event *event)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int mapping;
-
- mapping = nds32_pmu->map_event(event);
-
- if (mapping < 0) {
- pr_debug("event %x:%llx not supported\n", event->attr.type,
- event->attr.config);
- return mapping;
- }
-
- /*
- * We don't assign an index until we actually place the event onto
- * hardware. Use -1 to signify that we haven't decided where to put it
- * yet. For SMP systems, each core has it's own PMU so we can't do any
- * clever allocation or constraints checking at this point.
- */
- hwc->idx = -1;
- hwc->config_base = 0;
- hwc->config = 0;
- hwc->event_base = 0;
-
- /*
- * Check whether we need to exclude the counter from certain modes.
- */
- if ((!nds32_pmu->set_event_filter ||
- nds32_pmu->set_event_filter(hwc, &event->attr)) &&
- event_requires_mode_exclusion(&event->attr)) {
- pr_debug
- ("NDS performance counters do not support mode exclusion\n");
- return -EOPNOTSUPP;
- }
-
- /*
- * Store the event encoding into the config_base field.
- */
- hwc->config_base |= (unsigned long)mapping;
-
- if (!hwc->sample_period) {
- /*
- * For non-sampling runs, limit the sample_period to half
- * of the counter width. That way, the new counter value
- * is far less likely to overtake the previous one unless
- * you have some serious IRQ latency issues.
- */
- hwc->sample_period = nds32_pmu->max_period >> 1;
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
- }
-
- if (event->group_leader != event) {
- if (validate_group(event) != 0)
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int nds32_pmu_event_init(struct perf_event *event)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- int err = 0;
- atomic_t *active_events = &nds32_pmu->active_events;
-
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
-
- if (nds32_pmu->map_event(event) == -ENOENT)
- return -ENOENT;
-
- if (!atomic_inc_not_zero(active_events)) {
- if (atomic_read(active_events) == 0) {
- /* Register irq handler */
- err = nds32_pmu_reserve_hardware(nds32_pmu);
- }
-
- if (!err)
- atomic_inc(active_events);
- }
-
- if (err)
- return err;
-
- err = __hw_perf_event_init(event);
-
- return err;
-}
-
-static void nds32_start(struct perf_event *event, int flags)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- /*
- * NDS pmu always has to reprogram the period, so ignore
- * PERF_EF_RELOAD, see the comment below.
- */
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
- hwc->state = 0;
- /* Set the period for the event. */
- nds32_pmu_event_set_period(event);
-
- nds32_pmu->enable(event);
-}
-
-static int nds32_pmu_add(struct perf_event *event, int flags)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
- struct hw_perf_event *hwc = &event->hw;
- int idx;
- int err = 0;
-
- perf_pmu_disable(event->pmu);
-
- /* If we don't have a space for the counter then finish early. */
- idx = nds32_pmu->get_event_idx(hw_events, event);
- if (idx < 0) {
- err = idx;
- goto out;
- }
-
- /*
- * If there is an event in the counter we are going to use then make
- * sure it is disabled.
- */
- event->hw.idx = idx;
- nds32_pmu->disable(event);
- hw_events->events[idx] = event;
-
- hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
- if (flags & PERF_EF_START)
- nds32_start(event, PERF_EF_RELOAD);
-
- /* Propagate our changes to the userspace mapping. */
- perf_event_update_userpage(event);
-
-out:
- perf_pmu_enable(event->pmu);
- return err;
-}
-
-u64 nds32_pmu_event_update(struct perf_event *event)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- u64 delta, prev_raw_count, new_raw_count;
-
-again:
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = nds32_pmu->read_counter(event);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count) {
- goto again;
- }
- /*
- * Whether overflow or not, "unsigned substraction"
- * will always get their delta
- */
- delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
-
- local64_add(delta, &event->count);
- local64_sub(delta, &hwc->period_left);
-
- return new_raw_count;
-}
-
-static void nds32_stop(struct perf_event *event, int flags)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- /*
- * NDS pmu always has to update the counter, so ignore
- * PERF_EF_UPDATE, see comments in nds32_start().
- */
- if (!(hwc->state & PERF_HES_STOPPED)) {
- nds32_pmu->disable(event);
- nds32_pmu_event_update(event);
- hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
- }
-}
-
-static void nds32_pmu_del(struct perf_event *event, int flags)
-{
- struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
- struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- nds32_stop(event, PERF_EF_UPDATE);
- hw_events->events[idx] = NULL;
- clear_bit(idx, hw_events->used_mask);
-
- perf_event_update_userpage(event);
-}
-
-static void nds32_pmu_read(struct perf_event *event)
-{
- nds32_pmu_event_update(event);
-}
-
-/* Please refer to SPAv3 for more hardware specific details */
-PMU_FORMAT_ATTR(event, "config:0-63");
-
-static struct attribute *nds32_arch_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group nds32_pmu_format_group = {
- .name = "format",
- .attrs = nds32_arch_formats_attr,
-};
-
-static ssize_t nds32_pmu_cpumask_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return 0;
-}
-
-static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
-
-static struct attribute *nds32_pmu_common_attrs[] = {
- &dev_attr_cpus.attr,
- NULL,
-};
-
-static struct attribute_group nds32_pmu_common_group = {
- .attrs = nds32_pmu_common_attrs,
-};
-
-static const struct attribute_group *nds32_pmu_attr_groups[] = {
- &nds32_pmu_format_group,
- &nds32_pmu_common_group,
- NULL,
-};
-
-static void nds32_init(struct nds32_pmu *nds32_pmu)
-{
- atomic_set(&nds32_pmu->active_events, 0);
-
- nds32_pmu->pmu = (struct pmu) {
- .pmu_enable = nds32_pmu_enable,
- .pmu_disable = nds32_pmu_disable,
- .attr_groups = nds32_pmu_attr_groups,
- .event_init = nds32_pmu_event_init,
- .add = nds32_pmu_add,
- .del = nds32_pmu_del,
- .start = nds32_start,
- .stop = nds32_stop,
- .read = nds32_pmu_read,
- };
-}
-
-int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
-{
- nds32_init(nds32_pmu);
- pm_runtime_enable(&nds32_pmu->plat_device->dev);
- pr_info("enabled with %s PMU driver, %d counters available\n",
- nds32_pmu->name, nds32_pmu->num_events);
- return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
-}
-
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
-{
- return this_cpu_ptr(&cpu_hw_events);
-}
-
-static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
-{
- int err, irq, irqs;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
-
- if (!pmu_device)
- return -ENODEV;
-
- irqs = min(pmu_device->num_resources, num_possible_cpus());
- if (irqs < 1) {
- pr_err("no irqs for PMUs defined\n");
- return -ENODEV;
- }
-
- irq = platform_get_irq(pmu_device, 0);
- err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
- cpu_pmu);
- if (err) {
- pr_err("unable to request IRQ%d for NDS PMU counters\n",
- irq);
- return err;
- }
- return 0;
-}
-
-static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
-{
- int irq;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
-
- irq = platform_get_irq(pmu_device, 0);
- if (irq >= 0)
- free_irq(irq, cpu_pmu);
-}
-
-static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
-{
- int cpu;
- struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-
- raw_spin_lock_init(&events->pmu_lock);
-
- cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
- cpu_pmu->request_irq = cpu_pmu_request_irq;
- cpu_pmu->free_irq = cpu_pmu_free_irq;
-
- /* Ensure the PMU has sane values out of reset. */
- if (cpu_pmu->reset)
- on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
-}
-
-static const struct of_device_id cpu_pmu_of_device_ids[] = {
- {.compatible = "andestech,nds32v3-pmu",
- .data = device_pmu_init},
- {},
-};
-
-static int cpu_pmu_device_probe(struct platform_device *pdev)
-{
- const struct of_device_id *of_id;
- int (*init_fn)(struct nds32_pmu *nds32_pmu);
- struct device_node *node = pdev->dev.of_node;
- struct nds32_pmu *pmu;
- int ret = -ENODEV;
-
- if (cpu_pmu) {
- pr_notice("[perf] attempt to register multiple PMU devices!\n");
- return -ENOSPC;
- }
-
- pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
- if (!pmu)
- return -ENOMEM;
-
- of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
- if (node && of_id) {
- init_fn = of_id->data;
- ret = init_fn(pmu);
- } else {
- ret = probe_current_pmu(pmu);
- }
-
- if (ret) {
- pr_notice("[perf] failed to probe PMU!\n");
- goto out_free;
- }
-
- cpu_pmu = pmu;
- cpu_pmu->plat_device = pdev;
- cpu_pmu_init(cpu_pmu);
- ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
-
- if (!ret)
- return 0;
-
-out_free:
- pr_notice("[perf] failed to register PMU devices!\n");
- kfree(pmu);
- return ret;
-}
-
-static struct platform_driver cpu_pmu_driver = {
- .driver = {
- .name = "nds32-pfm",
- .of_match_table = cpu_pmu_of_device_ids,
- },
- .probe = cpu_pmu_device_probe,
- .id_table = cpu_pmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
- int err = 0;
-
- err = platform_driver_register(&cpu_pmu_driver);
- if (err)
- pr_notice("[perf] PMU initialization failed\n");
- else
- pr_notice("[perf] PMU initialization done\n");
-
- return err;
-}
-
-device_initcall(register_pmu_driver);
-
-/*
- * References: arch/nds32/kernel/traps.c:__dump()
- * You will need to know the NDS ABI first.
- */
-static int unwind_frame_kernel(struct stackframe *frame)
-{
- int graph = 0;
-#ifdef CONFIG_FRAME_POINTER
- /* 0x3 means misalignment */
- if (!kstack_end((void *)frame->fp) &&
- !((unsigned long)frame->fp & 0x3) &&
- ((unsigned long)frame->fp >= TASK_SIZE)) {
- /*
- * The array index is based on the ABI, the below graph
- * illustrate the reasons.
- * Function call procedure: "smw" and "lmw" will always
- * update SP and FP for you automatically.
- *
- * Stack Relative Address
- * | | 0
- * ----
- * |LP| <-- SP(before smw) <-- FP(after smw) -1
- * ----
- * |FP| -2
- * ----
- * | | <-- SP(after smw) -3
- */
- frame->lp = ((unsigned long *)frame->fp)[-1];
- frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
- /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
- if (__kernel_text_address(frame->lp))
- frame->lp = ftrace_graph_ret_addr
- (NULL, &graph, frame->lp, NULL);
-
- return 0;
- } else {
- return -EPERM;
- }
-#else
- /*
- * You can refer to arch/nds32/kernel/traps.c:__dump()
- * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
- * And, the "sp" is not always correct.
- *
- * Stack Relative Address
- * | | 0
- * ----
- * |LP| <-- SP(before smw) -1
- * ----
- * | | <-- SP(after smw) -2
- * ----
- */
- if (!kstack_end((void *)frame->sp)) {
- frame->lp = ((unsigned long *)frame->sp)[1];
- /* TODO: How to deal with the value in first
- * "sp" is not correct?
- */
- if (__kernel_text_address(frame->lp))
- frame->lp = ftrace_graph_ret_addr
- (tsk, &graph, frame->lp, NULL);
-
- frame->sp = ((unsigned long *)frame->sp) + 1;
-
- return 0;
- } else {
- return -EPERM;
- }
-#endif
-}
-
-static void notrace
-walk_stackframe(struct stackframe *frame,
- int (*fn_record)(struct stackframe *, void *),
- void *data)
-{
- while (1) {
- int ret;
-
- if (fn_record(frame, data))
- break;
-
- ret = unwind_frame_kernel(frame);
- if (ret < 0)
- break;
- }
-}
-
-/*
- * Gets called by walk_stackframe() for every stackframe. This will be called
- * whist unwinding the stackframe and is like a subroutine return so we use
- * the PC.
- */
-static int callchain_trace(struct stackframe *fr, void *data)
-{
- struct perf_callchain_entry_ctx *entry = data;
-
- perf_callchain_store(entry, fr->lp);
- return 0;
-}
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static unsigned long
-user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
-{
- struct frame_tail buftail;
- unsigned long lp = 0;
- unsigned long *user_frame_tail =
- (unsigned long *)(fp - (unsigned long)sizeof(buftail));
-
- /* Check accessibility of one struct frame_tail beyond */
- if (!access_ok(user_frame_tail, sizeof(buftail)))
- return 0;
- if (__copy_from_user_inatomic
- (&buftail, user_frame_tail, sizeof(buftail)))
- return 0;
-
- /*
- * Refer to unwind_frame_kernel() for more illurstration
- */
- lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */
- fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */
- perf_callchain_store(entry, lp);
- return fp;
-}
-
-static unsigned long
-user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
- unsigned long fp)
-{
- struct frame_tail_opt_size buftail;
- unsigned long lp = 0;
-
- unsigned long *user_frame_tail =
- (unsigned long *)(fp - (unsigned long)sizeof(buftail));
-
- /* Check accessibility of one struct frame_tail beyond */
- if (!access_ok(user_frame_tail, sizeof(buftail)))
- return 0;
- if (__copy_from_user_inatomic
- (&buftail, user_frame_tail, sizeof(buftail)))
- return 0;
-
- /*
- * Refer to unwind_frame_kernel() for more illurstration
- */
- lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */
- fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */
-
- perf_callchain_store(entry, lp);
- return fp;
-}
-
-/*
- * This will be called when the target is in user mode
- * This function will only be called when we use
- * "PERF_SAMPLE_CALLCHAIN" in
- * kernel/events/core.c:perf_prepare_sample()
- *
- * How to trigger perf_callchain_[user/kernel] :
- * $ perf record -e cpu-clock --call-graph fp ./program
- * $ perf report --call-graph
- */
-unsigned long leaf_fp;
-void
-perf_callchain_user(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- unsigned long fp = 0;
- unsigned long gp = 0;
- unsigned long lp = 0;
- unsigned long sp = 0;
- unsigned long *user_frame_tail;
-
- leaf_fp = 0;
-
- perf_callchain_store(entry, regs->ipc);
- fp = regs->fp;
- gp = regs->gp;
- lp = regs->lp;
- sp = regs->sp;
- if (entry->nr < PERF_MAX_STACK_DEPTH &&
- (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
- user_frame_tail =
- (unsigned long *)(fp - (unsigned long)sizeof(fp));
-
- if (!access_ok(user_frame_tail, sizeof(fp)))
- return;
-
- if (__copy_from_user_inatomic
- (&leaf_fp, user_frame_tail, sizeof(fp)))
- return;
-
- if (leaf_fp == lp) {
- /*
- * Maybe this is non leaf function
- * with optimize for size,
- * or maybe this is the function
- * with optimize for size
- */
- struct frame_tail buftail;
-
- user_frame_tail =
- (unsigned long *)(fp -
- (unsigned long)sizeof(buftail));
-
- if (!access_ok(user_frame_tail, sizeof(buftail)))
- return;
-
- if (__copy_from_user_inatomic
- (&buftail, user_frame_tail, sizeof(buftail)))
- return;
-
- if (buftail.stack_fp == gp) {
- /* non leaf function with optimize
- * for size condition
- */
- struct frame_tail_opt_size buftail_opt_size;
-
- user_frame_tail =
- (unsigned long *)(fp - (unsigned long)
- sizeof(buftail_opt_size));
-
- if (!access_ok(user_frame_tail,
- sizeof(buftail_opt_size)))
- return;
-
- if (__copy_from_user_inatomic
- (&buftail_opt_size, user_frame_tail,
- sizeof(buftail_opt_size)))
- return;
-
- perf_callchain_store(entry, lp);
- fp = buftail_opt_size.stack_fp;
-
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
- (unsigned long)fp &&
- !((unsigned long)fp & 0x7) &&
- fp > sp) {
- sp = fp;
- fp = user_backtrace_opt_size(entry, fp);
- }
-
- } else {
- /* this is the function
- * without optimize for size
- */
- fp = buftail.stack_fp;
- perf_callchain_store(entry, lp);
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
- (unsigned long)fp &&
- !((unsigned long)fp & 0x7) &&
- fp > sp) {
- sp = fp;
- fp = user_backtrace(entry, fp);
- }
- }
- } else {
- /* this is leaf function */
- fp = leaf_fp;
- perf_callchain_store(entry, lp);
-
- /* previous function callcahin */
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
- (unsigned long)fp &&
- !((unsigned long)fp & 0x7) && fp > sp) {
- sp = fp;
- fp = user_backtrace(entry, fp);
- }
- }
- return;
- }
-}
-
-/* This will be called when the target is in kernel mode */
-void
-perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- struct stackframe fr;
-
- fr.fp = regs->fp;
- fr.lp = regs->lp;
- fr.sp = regs->sp;
- walk_stackframe(&fr, callchain_trace, entry);
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- int misc = 0;
-
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
-
- return misc;
-}