From 1201a5a25cf0f7d8019d106a81c64f3a7397d17c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Jun 2018 11:45:36 -0700 Subject: perf/arm-cci: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this removes the VLA in favor of a maximum size and adds a sanity check at registration time. The sizes are all explicitly enumerated already, so this just collects them into macros. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 0d09d8e669cd..1bfeb160c5b1 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -53,6 +53,16 @@ enum { CCI_IF_MAX, }; +#define NUM_HW_CNTRS_CII_4XX 4 +#define NUM_HW_CNTRS_CII_5XX 8 +#define NUM_HW_CNTRS_MAX NUM_HW_CNTRS_CII_5XX + +#define FIXED_HW_CNTRS_CII_4XX 1 +#define FIXED_HW_CNTRS_CII_5XX 0 +#define FIXED_HW_CNTRS_MAX FIXED_HW_CNTRS_CII_4XX + +#define HW_CNTRS_MAX (NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX) + struct event_range { u32 min; u32 max; @@ -633,8 +643,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; - - DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(mask, HW_CNTRS_MAX); bitmap_zero(mask, cci_pmu->num_cntrs); for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { @@ -940,7 +949,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; - DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX); bitmap_zero(saved_mask, cci_pmu->num_cntrs); pmu_save_counters(cci_pmu, saved_mask); @@ -1245,7 +1254,7 @@ static int validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; + unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)]; struct cci_pmu_hw_events fake_pmu = { /* * Initialise the fake PMU. We only need to populate the @@ -1403,6 +1412,11 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) char *name = model->name; u32 num_cntrs; + if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX)) + return -EINVAL; + if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX)) + return -EINVAL; + pmu_event_attr_group.attrs = model->event_attrs; pmu_format_attr_group.attrs = model->format_attrs; @@ -1455,8 +1469,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI400_PMU [CCI400_R0] = { .name = "CCI_400", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r0_pmu_event_attrs, @@ -1475,8 +1489,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI400_R1] = { .name = "CCI_400_r1", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r1_pmu_event_attrs, @@ -1497,8 +1511,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI5xx_PMU [CCI500_R0] = { .name = "CCI_500", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, @@ -1521,8 +1535,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI550_R0] = { .name = "CCI_550", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, -- cgit v1.2.3 From 59b62e7ad0874681dc4f84cac90cc991dc265809 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 4 Jul 2018 11:50:50 +0100 Subject: drivers/perf: Initialise return value in armpmu_request_irqs() If a PMU doesn't have any IRQs, we should return 0 from armpmu_request_irqs(), rather than uninitialised stack. Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 971ff336494a..96075cecb0ae 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -160,7 +160,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) static int armpmu_request_irqs(struct arm_pmu *armpmu) { struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int cpu, err; + int cpu, err = 0; for_each_cpu(cpu, &armpmu->supported_cpus) { int irq = per_cpu(hw_events->irq, cpu); -- cgit v1.2.3 From 8d3e994241e6bcc7ead2b918c4f15b7683afa90a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:57:58 +0100 Subject: arm_pmu: Clean up maximum period handling Each PMU defines their max_period of the counter as the maximum value that can be counted. Since all the PMU backends support 32bit counters by default, let us remove the redundant field. No functional changes. Cc: Will Deacon Acked-by: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 2 -- arch/arm/kernel/perf_event_v7.c | 1 - arch/arm/kernel/perf_event_xscale.c | 2 -- arch/arm64/kernel/perf_event.c | 1 - drivers/perf/arm_pmu.c | 16 ++++++++++++---- include/linux/perf/arm_pmu.h | 1 - 6 files changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/perf') diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index be42c4f66a40..f64a6bfebcec 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -495,7 +495,6 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; } static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) @@ -546,7 +545,6 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 5a5116794440..2cf1ca2925c8 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1170,7 +1170,6 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; - cpu_pmu->max_period = (1LLU << 32) - 1; }; static void armv7_read_num_pmnc_events(void *info) diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 88d1a76f5367..c4f029458b52 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -374,7 +374,6 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } @@ -743,7 +742,6 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 5; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 33147aacdafd..678ecffd3724 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -960,7 +960,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; return 0; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a6347d487635..6ddc00da5373 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -28,6 +28,11 @@ static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static inline u64 arm_pmu_max_period(void) +{ + return (1ULL << 32) - 1; +} + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -114,8 +119,10 @@ int armpmu_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; + u64 max_period; int ret = 0; + max_period = arm_pmu_max_period(); if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); @@ -136,8 +143,8 @@ int armpmu_event_set_period(struct perf_event *event) * effect we are reducing max_period to account for * interrupt latency (and we are being very conservative). */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; + if (left > (max_period >> 1)) + left = (max_period >> 1); local64_set(&hwc->prev_count, (u64)-left); @@ -153,6 +160,7 @@ u64 armpmu_event_update(struct perf_event *event) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; + u64 max_period = arm_pmu_max_period(); again: prev_raw_count = local64_read(&hwc->prev_count); @@ -162,7 +170,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + delta = (new_raw_count - prev_raw_count) & max_period; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -402,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event) * is far less likely to overtake the previous one unless * you have some serious IRQ latency issues. */ - hwc->sample_period = armpmu->max_period >> 1; + hwc->sample_period = arm_pmu_max_period() >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ad5444491975..12c30a22fc8d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -94,7 +94,6 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); int num_events; - u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); -- cgit v1.2.3 From e2da97d328d4951d25f6634eda7213f7257417b6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:00 +0100 Subject: arm_pmu: Add support for 64bit event counters Each PMU has a set of 32bit event counters. But in some special cases, the events could be counted using counters which are effectively 64bit wide. e.g, Arm V8 PMUv3 has a 64 bit cycle counter which can count only the CPU cycles. Also, the PMU can chain the event counters to effectively count as a 64bit counter. Add support for tracking the events that uses 64bit counters. This only affects the periods set for each counter in the core driver. Cc: Will Deacon Reviewed-by: Julien Thierry Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 16 ++++++++++------ include/linux/perf/arm_pmu.h | 6 ++++++ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6ddc00da5373..8cad6b535a2c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -28,9 +28,12 @@ static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); -static inline u64 arm_pmu_max_period(void) +static inline u64 arm_pmu_event_max_period(struct perf_event *event) { - return (1ULL << 32) - 1; + if (event->hw.flags & ARMPMU_EVT_64BIT) + return GENMASK_ULL(63, 0); + else + return GENMASK_ULL(31, 0); } static int @@ -122,7 +125,7 @@ int armpmu_event_set_period(struct perf_event *event) u64 max_period; int ret = 0; - max_period = arm_pmu_max_period(); + max_period = arm_pmu_event_max_period(event); if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); @@ -148,7 +151,7 @@ int armpmu_event_set_period(struct perf_event *event) local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & max_period); perf_event_update_userpage(event); @@ -160,7 +163,7 @@ u64 armpmu_event_update(struct perf_event *event) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; - u64 max_period = arm_pmu_max_period(); + u64 max_period = arm_pmu_event_max_period(event); again: prev_raw_count = local64_read(&hwc->prev_count); @@ -368,6 +371,7 @@ __hw_perf_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int mapping; + hwc->flags = 0; mapping = armpmu->map_event(event); if (mapping < 0) { @@ -410,7 +414,7 @@ __hw_perf_event_init(struct perf_event *event) * is far less likely to overtake the previous one unless * you have some serious IRQ latency issues. */ - hwc->sample_period = arm_pmu_max_period() >> 1; + hwc->sample_period = arm_pmu_event_max_period(event) >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index f7126a21df30..10f92e1d8e7b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -25,6 +25,12 @@ */ #define ARMPMU_MAX_HWEVENTS 32 +/* + * ARM PMU hw_event flags + */ +/* Event uses a 64bit counter */ +#define ARMPMU_EVT_64BIT 1 + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF -- cgit v1.2.3 From 7dfc8db1d117ae08c649266d5459ac8b10800d6e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:01 +0100 Subject: arm_pmu: Tidy up clear_event_idx call backs The armpmu uses get_event_idx callback to allocate an event counter for a given event, which marks the selected counter as "used". Now, when we delete the counter, the arm_pmu goes ahead and clears the "used" bit and then invokes the "clear_event_idx" call back, which kind of splits the job between the core code and the backend. To keep things tidy, mandate the implementation of clear_event_idx() and add it for exisiting backends. This will be useful for adding the chained event support, where we leave the event idx maintenance to the backend. Also, when an event is removed from the PMU, reset the hw.idx to indicate that a counter is not allocated for this event, to help the backends do better checks. This will be also used for the chain counter support. Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Julien Thierry Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 8 ++++++++ arch/arm/kernel/perf_event_v7.c | 9 +++++++++ arch/arm/kernel/perf_event_xscale.c | 8 ++++++++ arch/arm64/kernel/perf_event.c | 7 +++++++ drivers/perf/arm_pmu.c | 7 +++---- 5 files changed, 35 insertions(+), 4 deletions(-) (limited to 'drivers/perf') diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 0729f9841ef4..1ae99deeec54 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -411,6 +411,12 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; @@ -491,6 +497,7 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; @@ -541,6 +548,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 973043dd6187..a4fb0f8b8f84 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1058,6 +1058,12 @@ static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -1167,6 +1173,7 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv7pmu_read_counter; cpu_pmu->write_counter = armv7pmu_write_counter; cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; @@ -1637,6 +1644,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool krait_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -1966,6 +1974,7 @@ static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool scorpion_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || scorpion_event) { bit = scorpion_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 942230fe0426..f6cdcacfb96d 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -292,6 +292,12 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; @@ -370,6 +376,7 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale1pmu_read_counter; cpu_pmu->write_counter = xscale1pmu_write_counter; cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale1pmu_start; cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; @@ -738,6 +745,7 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale2pmu_read_counter; cpu_pmu->write_counter = xscale2pmu_write_counter; cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale2pmu_start; cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 66a2ffdca6dd..ac66851d4b13 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -778,6 +778,12 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -956,6 +962,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv8pmu_read_counter, cpu_pmu->write_counter = armv8pmu_write_counter, cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx, cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 8cad6b535a2c..a28881058f18 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -238,11 +238,10 @@ armpmu_del(struct perf_event *event, int flags) armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - + armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); + /* Clear the allocated counter */ + hwc->idx = -1; } static int -- cgit v1.2.3 From c13207905340d85eaddd85b6d2868218f324b180 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 10 Jul 2018 09:58:04 +0100 Subject: arm64: perf: Add support for chaining event counters Add support for 64bit event by using chained event counters and 64bit cycle counters. PMUv3 allows chaining a pair of adjacent 32-bit counters, effectively forming a 64-bit counter. The low/even counter is programmed to count the event of interest, and the high/odd counter is programmed to count the CHAIN event, taken when the low/even counter overflows. For CPU cycles, when 64bit mode is requested, the cycle counter is used in 64bit mode. If the cycle counter is not available, falls back to chaining. Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 185 +++++++++++++++++++++++++++++++++++------ drivers/perf/arm_pmu.c | 9 +- 2 files changed, 160 insertions(+), 34 deletions(-) (limited to 'drivers/perf') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b414d81b80ca..dfff5ed5c625 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -446,9 +446,16 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, + &format_attr_long.attr, NULL, }; @@ -465,6 +472,21 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) +/* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event). This must be called only when the event has + * a counter allocated. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + + return !WARN_ON(idx < 0) && + armv8pmu_event_is_64bit(event) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + /* * ARMv8 low level PMU access */ @@ -516,12 +538,23 @@ static inline u32 armv8pmu_read_evcntr(int idx) return read_sysreg(pmxevcntr_el0); } +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = 0; + + val = armv8pmu_read_evcntr(idx); + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; +} + static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u32 value = 0; + u64 value = 0; if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", @@ -529,7 +562,7 @@ static inline u64 armv8pmu_read_counter(struct perf_event *event) else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else - value = armv8pmu_read_evcntr(idx); + value = armv8pmu_read_hw_counter(event); return value; } @@ -540,6 +573,19 @@ static inline void armv8pmu_write_evcntr(int idx, u32 value) write_sysreg(value, pmxevcntr_el0); } +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -551,14 +597,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* - * Set the upper 32bits as this is a 64bit counter but we only - * count using the lower 32bits and we want an interrupt when - * it overflows. + * The cycles counter is really a 64-bit counter. + * When treating it as a 32-bit counter, we only count + * the lower 32 bits, and set the upper 32-bits so that + * we get an interrupt upon 32-bit overflow. */ - value |= 0xffffffff00000000ULL; + if (!armv8pmu_event_is_64bit(event)) + value |= 0xffffffff00000000ULL; write_sysreg(value, pmccntr_el0); } else - armv8pmu_write_evcntr(idx, value); + armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) @@ -568,6 +616,27 @@ static inline void armv8pmu_write_evtype(int idx, u32 val) write_sysreg(val, pmxevtyper_el0); } +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + armv8pmu_write_evtype(idx, hwc->config_base); + } +} + static inline int armv8pmu_enable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -575,6 +644,16 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + + armv8pmu_enable_counter(idx); + if (armv8pmu_event_is_chained(event)) + armv8pmu_enable_counter(idx - 1); + isb(); +} + static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -582,6 +661,16 @@ static inline int armv8pmu_disable_counter(int idx) return idx; } +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_chained(event)) + armv8pmu_disable_counter(idx - 1); + armv8pmu_disable_counter(idx); +} + static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -589,6 +678,11 @@ static inline int armv8pmu_enable_intens(int idx) return idx; } +static inline int armv8pmu_enable_event_irq(struct perf_event *event) +{ + return armv8pmu_enable_intens(event->hw.idx); +} + static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -601,6 +695,11 @@ static inline int armv8pmu_disable_intens(int idx) return idx; } +static inline int armv8pmu_disable_event_irq(struct perf_event *event) +{ + return armv8pmu_disable_intens(event->hw.idx); +} + static inline u32 armv8pmu_getreset_flags(void) { u32 value; @@ -618,10 +717,8 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -632,22 +729,22 @@ static void armv8pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Set event (if destined for PMNx counters). */ - armv8pmu_write_evtype(idx, hwc->config_base); + armv8pmu_write_event_type(event); /* * Enable interrupt for this counter */ - armv8pmu_enable_intens(idx); + armv8pmu_enable_event_irq(event); /* * Enable counter */ - armv8pmu_enable_counter(idx); + armv8pmu_enable_event_counter(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -655,10 +752,8 @@ static void armv8pmu_enable_event(struct perf_event *event) static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Disable counter and interrupt @@ -668,12 +763,12 @@ static void armv8pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Disable interrupt for this counter */ - armv8pmu_disable_intens(idx); + armv8pmu_disable_event_irq(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -767,10 +862,42 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) return IRQ_HANDLED; } +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; +} + +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) +{ + int idx; + + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { - int idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -784,19 +911,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; + if (armv8pmu_event_is_64bit(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); } static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) + struct perf_event *event) { - clear_bit(event->hw.idx, cpuc->used_mask); + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); } /* @@ -871,6 +999,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + /* Onl expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a28881058f18..7f01f6f60b87 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -665,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) int idx; for (idx = 0; idx < armpmu->num_events; idx++) { - /* - * If the counter is not used skip it, there is no - * need of stopping/restarting it. - */ - if (!test_bit(idx, hw_events->used_mask)) - continue; - event = hw_events->events[idx]; + if (!event) + continue; switch (cmd) { case CPU_PM_ENTER: -- cgit v1.2.3 From 06060ea7fb5b6b9b4c3251e0dcabbcca8ad91674 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 19 Jul 2018 19:26:40 +0800 Subject: drivers/perf: hisi: update the sccl_id/ccl_id when MT is supported MT bit in MPIDR_EL1 is now supported in certain HiSilicon platforms, so the mapping between sccl_id/ccl_id and affinity level needs to be updated from the generic encoding we originally used. Cc: John Garry Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Shaokun Zhang [will: fixed comment] Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 44df61397a38..9efd2413240c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -350,19 +350,21 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID - * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is - * in MPIDR[aff1]. If this changes in future, this shall be updated. + * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] + * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) { u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + if (sccl_id) - *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + *sccl_id = aff2 >> 3; if (ccl_id) - *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + *ccl_id = aff2 & 0x7; } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); -- cgit v1.2.3 From 809092dc3e606f3508b53baa624b27bfff8f0e7f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 26 Jul 2018 12:23:31 +0100 Subject: drivers/perf: arm-ccn: Use devm_ioremap_resource() to map memory Instead of checking the return value of platform_get_resource(), we can use devm_ioremap_resource() which has the NULL pointer check and the memory region requesting. devm_ioremap_resource is designed to replace calls to devm_request_mem_region followed by devm_ioremap, so let's use the same. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/perf') diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index b416ee18e6bb..4b15c36f4631 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1485,17 +1485,9 @@ static int arm_ccn_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ccn); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EINVAL; - - if (!devm_request_mem_region(ccn->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - ccn->base = devm_ioremap(ccn->dev, res->start, - resource_size(res)); - if (!ccn->base) - return -EFAULT; + ccn->base = devm_ioremap_resource(ccn->dev, res); + if (IS_ERR(ccn->base)) + return PTR_ERR(ccn->base); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) -- cgit v1.2.3