summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 13:55:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 13:55:45 -0700
commitd691b7e7d1b5186eae62fd32adee65d3316bfdf6 (patch)
tree3808f7deab74f68267b9fdd6a35dcda9e50142aa /arch/powerpc/perf
parentb59eea554f57befa2aa3172fcb63e521bdd850dd (diff)
parent1e0fc9d1eb2b0241a03e0a02bcdb9b5b641b9d35 (diff)
downloadlinux-d691b7e7d1b5186eae62fd32adee65d3316bfdf6.tar.bz2
Merge tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights include: - Support for STRICT_KERNEL_RWX on 64-bit server CPUs. - Platform support for FSP2 (476fpe) board - Enable ZONE_DEVICE on 64-bit server CPUs. - Generic & powerpc spin loop primitives to optimise busy waiting - Convert VDSO update function to use new update_vsyscall() interface - Optimisations to hypercall/syscall/context-switch paths - Improvements to the CPU idle code on Power8 and Power9. As well as many other fixes and improvements. Thanks to: Akshay Adiga, Andrew Donnellan, Andrew Jeffery, Anshuman Khandual, Anton Blanchard, Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe Lombard, Colin Ian King, Dan Carpenter, Gautham R. Shenoy, Hari Bathini, Ian Munsie, Ivan Mikhaylov, Javier Martinez Canillas, Madhavan Srinivasan, Masahiro Yamada, Matt Brown, Michael Neuling, Michal Suchanek, Murilo Opsfelder Araujo, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pavel Machek, Russell Currey, Santosh Sivaraj, Stephen Rothwell, Thiago Jung Bauermann, Yang Li" * tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (158 commits) powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs powerpc/mm/radix: Implement STRICT_RWX/mark_rodata_ro() for Radix powerpc/mm/hash: Implement mark_rodata_ro() for hash powerpc/vmlinux.lds: Align __init_begin to 16M powerpc/lib/code-patching: Use alternate map for patch_instruction() powerpc/xmon: Add patch_instruction() support for xmon powerpc/kprobes/optprobes: Use patch_instruction() powerpc/kprobes: Move kprobes over to patch_instruction() powerpc/mm/radix: Fix execute permissions for interrupt_vectors powerpc/pseries: Fix passing of pp0 in updatepp() and updateboltedpp() powerpc/64s: Blacklist rtas entry/exit from kprobes powerpc/64s: Blacklist functions invoked on a trap powerpc/64s: Un-blacklist system_call() from kprobes powerpc/64s: Move system_call() symbol to just after setting MSR_EE powerpc/64s: Blacklist system_call() and system_call_common() from kprobes powerpc/64s: Convert .L__replay_interrupt_return to a local label powerpc64/elfv1: Only dereference function descriptor for non-text symbols cxl: Export library to support IBM XSL powerpc/dts: Use #include "..." to include local DT powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8 ...
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/hv-24x7.c242
-rw-r--r--arch/powerpc/perf/hv-24x7.h69
-rw-r--r--arch/powerpc/perf/power9-events-list.h4
-rw-r--r--arch/powerpc/perf/power9-pmu.c8
4 files changed, 250 insertions, 73 deletions
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 7b2ca16b1eb4..9c88b82f6229 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
@@ -27,6 +28,12 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"
+/* Version of the 24x7 hypervisor API that we should use in this machine. */
+static int interface_version;
+
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
static bool domain_is_valid(unsigned domain)
{
switch (domain) {
@@ -54,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
}
}
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+ return aggregate_result_elements &&
+ (domain == HV_PERF_DOMAIN_PHYS_CORE ||
+ (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+ domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
static const char *domain_name(unsigned domain)
{
if (!domain_is_valid(domain))
@@ -74,7 +90,11 @@ static const char *domain_name(unsigned domain)
static bool catalog_entry_domain_is_valid(unsigned domain)
{
- return is_physical_domain(domain);
+ /* POWER8 doesn't support virtual domains. */
+ if (interface_version == 1)
+ return is_physical_domain(domain);
+ else
+ return domain_is_valid(domain);
}
/*
@@ -166,6 +186,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static unsigned int max_num_requests(int interface_version)
+{
+ return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
+ / H24x7_REQUEST_SIZE(interface_version);
+}
+
static char *event_name(struct hv_24x7_event_data *ev, int *len)
{
*len = be16_to_cpu(ev->event_name_len) - 2;
@@ -260,9 +286,8 @@ static void *event_end(struct hv_24x7_event_data *ev, void *end)
return start + nl + dl + ldl;
}
-static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
- unsigned long version,
- unsigned long index)
+static long h_get_24x7_catalog_page_(unsigned long phys_4096,
+ unsigned long version, unsigned long index)
{
pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
phys_4096, version, index);
@@ -273,8 +298,7 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
phys_4096, version, index);
}
-static unsigned long h_get_24x7_catalog_page(char page[],
- u64 version, u32 index)
+static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
{
return h_get_24x7_catalog_page_(virt_to_phys(page),
version, index);
@@ -664,13 +688,13 @@ static int create_events_from_catalog(struct attribute ***events_,
struct attribute ***event_descs_,
struct attribute ***event_long_descs_)
{
- unsigned long hret;
+ long hret;
size_t catalog_len, catalog_page_len, event_entry_count,
event_data_len, event_data_offs,
event_data_bytes, junk_events, event_idx, event_attr_ct, i,
attr_max, event_idx_last, desc_ct, long_desc_ct;
ssize_t ct, ev_len;
- uint32_t catalog_version_num;
+ uint64_t catalog_version_num;
struct attribute **events, **event_descs, **event_long_descs;
struct hv_24x7_catalog_page_0 *page_0 =
kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
@@ -706,8 +730,8 @@ static int create_events_from_catalog(struct attribute ***events_,
event_data_offs = be16_to_cpu(page_0->event_data_offs);
event_data_len = be16_to_cpu(page_0->event_data_len);
- pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
- (size_t)catalog_version_num, catalog_len,
+ pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
+ catalog_version_num, catalog_len,
event_entry_count, event_data_offs, event_data_len);
if ((MAX_4K < event_data_len)
@@ -761,8 +785,8 @@ static int create_events_from_catalog(struct attribute ***events_,
catalog_version_num,
i + event_data_offs);
if (hret) {
- pr_err("failed to get event data in page %zu\n",
- i + event_data_offs);
+ pr_err("Failed to get event data in page %zu: rc=%ld\n",
+ i + event_data_offs, hret);
ret = -EIO;
goto e_event_data;
}
@@ -903,7 +927,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
- unsigned long hret;
+ long hret;
ssize_t ret = 0;
size_t catalog_len = 0, catalog_page_len = 0;
loff_t page_offset = 0;
@@ -988,7 +1012,7 @@ static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
char *buf) \
{ \
- unsigned long hret; \
+ long hret; \
ssize_t ret = 0; \
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
struct hv_24x7_catalog_page_0 *page_0 = page; \
@@ -1040,21 +1064,6 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
- struct hv_24x7_data_result_buffer *result_buffer,
- unsigned long ret)
-{
- struct hv_24x7_request *req;
-
- req = &request_buffer->requests[0];
- pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
- "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
- req->performance_domain, req->data_offset,
- req->starting_ix, req->starting_lpar_ix, ret, ret,
- result_buffer->detailed_rc,
- result_buffer->failing_request_ix);
-}
-
/*
* Start the process for a new H_GET_24x7_DATA hcall.
*/
@@ -1062,10 +1071,10 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- memset(request_buffer, 0, 4096);
- memset(result_buffer, 0, 4096);
+ memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+ memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
- request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+ request_buffer->interface_version = interface_version;
/* memset above set request_buffer->num_requests to 0 */
}
@@ -1076,7 +1085,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- unsigned long ret;
+ long ret;
/*
* NOTE: Due to variable number of array elements in request and
@@ -1087,10 +1096,19 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
- if (ret)
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret) {
+ struct hv_24x7_request *req;
+
+ req = request_buffer->requests;
+ pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+ req->performance_domain, req->data_offset,
+ req->starting_ix, req->starting_lpar_ix,
+ ret, ret, result_buffer->detailed_rc,
+ result_buffer->failing_request_ix);
+ return -EIO;
+ }
- return ret;
+ return 0;
}
/*
@@ -1105,9 +1123,11 @@ static int add_event_to_24x7_request(struct perf_event *event,
{
u16 idx;
int i;
+ size_t req_size;
struct hv_24x7_request *req;
- if (request_buffer->num_requests > 254) {
+ if (request_buffer->num_requests >=
+ max_num_requests(request_buffer->interface_version)) {
pr_devel("Too many requests for 24x7 HCALL %d\n",
request_buffer->num_requests);
return -EINVAL;
@@ -1124,23 +1144,113 @@ static int add_event_to_24x7_request(struct perf_event *event,
idx = event_get_vcpu(event);
}
+ req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
+
i = request_buffer->num_requests++;
- req = &request_buffer->requests[i];
+ req = (void *) request_buffer->requests + i * req_size;
req->performance_domain = event_get_domain(event);
req->data_size = cpu_to_be16(8);
req->data_offset = cpu_to_be32(event_get_offset(event));
- req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+ req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
req->max_num_lpars = cpu_to_be16(1);
req->starting_ix = cpu_to_be16(idx);
req->max_ix = cpu_to_be16(1);
+ if (request_buffer->interface_version > 1) {
+ if (domain_needs_aggregation(req->performance_domain))
+ req->max_num_thread_groups = -1;
+ else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+ req->starting_thread_group_ix = idx % 2;
+ req->max_num_thread_groups = 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
+ *
+ * @event: Event associated with @res.
+ * @resb: Result buffer containing @res.
+ * @res: Result to work on.
+ * @countp: Output variable containing the event count.
+ * @next: Optional output variable pointing to the next result in @resb.
+ */
+static int get_count_from_result(struct perf_event *event,
+ struct hv_24x7_data_result_buffer *resb,
+ struct hv_24x7_result *res, u64 *countp,
+ struct hv_24x7_result **next)
+{
+ u16 num_elements = be16_to_cpu(res->num_elements_returned);
+ u16 data_size = be16_to_cpu(res->result_element_data_size);
+ unsigned int data_offset;
+ void *element_data;
+ int i;
+ u64 count;
+
+ /*
+ * We can bail out early if the result is empty.
+ */
+ if (!num_elements) {
+ pr_debug("Result of request %hhu is empty, nothing to do\n",
+ res->result_ix);
+
+ if (next)
+ *next = (struct hv_24x7_result *) res->elements;
+
+ return -ENODATA;
+ }
+
+ /*
+ * Since we always specify 1 as the maximum for the smallest resource
+ * we're requesting, there should to be only one element per result.
+ * Except when an event needs aggregation, in which case there are more.
+ */
+ if (num_elements != 1 &&
+ !domain_needs_aggregation(event_get_domain(event))) {
+ pr_err("Error: result of request %hhu has %hu elements\n",
+ res->result_ix, num_elements);
+
+ return -EIO;
+ }
+
+ if (data_size != sizeof(u64)) {
+ pr_debug("Error: result of request %hhu has data of %hu bytes\n",
+ res->result_ix, data_size);
+
+ return -ENOTSUPP;
+ }
+
+ if (resb->interface_version == 1)
+ data_offset = offsetof(struct hv_24x7_result_element_v1,
+ element_data);
+ else
+ data_offset = offsetof(struct hv_24x7_result_element_v2,
+ element_data);
+
+ /* Go through the result elements in the result. */
+ for (i = count = 0, element_data = res->elements + data_offset;
+ i < num_elements;
+ i++, element_data += data_size + data_offset)
+ count += be64_to_cpu(*((u64 *) element_data));
+
+ *countp = count;
+
+ /* The next result is after the last result element. */
+ if (next)
+ *next = element_data - data_offset;
+
return 0;
}
-static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
+static int single_24x7_request(struct perf_event *event, u64 *count)
{
- unsigned long ret;
+ int ret;
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
@@ -1157,13 +1267,12 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
goto out;
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto out;
- }
/* process result from hcall */
- *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
+ ret = get_count_from_result(event, result_buffer,
+ result_buffer->results, count, NULL);
out:
put_cpu_var(hv_24x7_reqb);
@@ -1216,9 +1325,8 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* Domains above 6 are invalid */
domain = event_get_domain(event);
- if (domain > 6) {
+ if (domain >= HV_PERF_DOMAIN_MAX) {
pr_devel("invalid domain %d\n", domain);
return -EINVAL;
}
@@ -1250,10 +1358,9 @@ static int h_24x7_event_init(struct perf_event *event)
static u64 h_24x7_get_value(struct perf_event *event)
{
- unsigned long ret;
u64 ct;
- ret = single_24x7_request(event, &ct);
- if (ret)
+
+ if (single_24x7_request(event, &ct))
/* We checked this in event init, shouldn't fail here... */
return 0;
@@ -1396,8 +1503,7 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
{
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
- struct hv_24x7_result *resb;
- struct perf_event *event;
+ struct hv_24x7_result *res, *next_res;
u64 count;
int i, ret, txn_flags;
struct hv_24x7_hw *h24x7hw;
@@ -1417,19 +1523,21 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
result_buffer = (void *)get_cpu_var(hv_24x7_resb);
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto put_reqb;
- }
h24x7hw = &get_cpu_var(hv_24x7_hw);
- /* Update event counts from hcall */
- for (i = 0; i < request_buffer->num_requests; i++) {
- resb = &result_buffer->results[i];
- count = be64_to_cpu(resb->elements[0].element_data[0]);
- event = h24x7hw->events[i];
- h24x7hw->events[i] = NULL;
+ /* Go through results in the result buffer to update event counts. */
+ for (i = 0, res = result_buffer->results;
+ i < result_buffer->num_results; i++, res = next_res) {
+ struct perf_event *event = h24x7hw->events[res->result_ix];
+
+ ret = get_count_from_result(event, result_buffer, res, &count,
+ &next_res);
+ if (ret)
+ break;
+
update_event_count(event, count);
}
@@ -1480,6 +1588,18 @@ static int hv_24x7_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
+ } else if (!cur_cpu_spec->oprofile_cpu_type)
+ return -ENODEV;
+
+ /* POWER8 only supports v1, while POWER9 only supports v2. */
+ if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ interface_version = 1;
+ else {
+ interface_version = 2;
+
+ /* SMT8 in POWER9 needs to aggregate result elements. */
+ if (threads_per_core == 8)
+ aggregate_result_elements = true;
}
hret = hv_perf_caps_get(&caps);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 634ef4082cdc..5092c4a222a6 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -10,6 +10,8 @@ enum hv_perf_domains {
HV_PERF_DOMAIN_MAX,
};
+#define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32)
+
struct hv_24x7_request {
/* PHYSICAL domains require enabling via phyp/hmc. */
__u8 performance_domain;
@@ -42,19 +44,27 @@ struct hv_24x7_request {
/* chip, core, or virtual processor based on @performance_domain */
__be16 starting_ix;
__be16 max_ix;
+
+ /* The following fields were added in v2 of the 24x7 interface. */
+
+ __u8 starting_thread_group_ix;
+
+ /* -1 means all thread groups starting at @starting_thread_group_ix */
+ __u8 max_num_thread_groups;
+
+ __u8 reserved2[0xE];
} __packed;
struct hv_24x7_request_buffer {
/* 0 - ? */
/* 1 - ? */
-#define HV_24X7_IF_VERSION_CURRENT 0x01
__u8 interface_version;
__u8 num_requests;
__u8 reserved[0xE];
- struct hv_24x7_request requests[1];
+ struct hv_24x7_request requests[];
} __packed;
-struct hv_24x7_result_element {
+struct hv_24x7_result_element_v1 {
__be16 lpar_ix;
/*
@@ -67,10 +77,38 @@ struct hv_24x7_result_element {
__be32 lpar_cfg_instance_id;
/* size = @result_element_data_size of containing result. */
- __u64 element_data[1];
+ __u64 element_data[];
+} __packed;
+
+/*
+ * We need a separate struct for v2 because the offset of @element_data changed
+ * between versions.
+ */
+struct hv_24x7_result_element_v2 {
+ __be16 lpar_ix;
+
+ /*
+ * represents the core, chip, or virtual processor based on the
+ * request's @performance_domain
+ */
+ __be16 domain_ix;
+
+ /* -1 if @performance_domain does not refer to a virtual processor */
+ __be32 lpar_cfg_instance_id;
+
+ __u8 thread_group_ix;
+
+ __u8 reserved[7];
+
+ /* size = @result_element_data_size of containing result. */
+ __u64 element_data[];
} __packed;
struct hv_24x7_result {
+ /*
+ * The index of the 24x7 Request Structure in the 24x7 Request Buffer
+ * used to request this result.
+ */
__u8 result_ix;
/*
@@ -81,14 +119,25 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the corresponding hv_24x7_request */
+ /*
+ * This is a copy of @data_size from the corresponding hv_24x7_request
+ *
+ * Warning: to obtain the size of each element in @elements you have
+ * to add the size of the other members of the result_element struct.
+ */
__be16 result_element_data_size;
__u8 reserved[0x2];
- /* WARNING: only valid for first result element due to variable sizes
- * of result elements */
- /* struct hv_24x7_result_element[@num_elements_returned] */
- struct hv_24x7_result_element elements[1];
+ /*
+ * Either
+ * struct hv_24x7_result_element_v1[@num_elements_returned]
+ * or
+ * struct hv_24x7_result_element_v2[@num_elements_returned]
+ *
+ * depending on the interface_version field of the
+ * struct hv_24x7_data_result_buffer containing this result.
+ */
+ char elements[];
} __packed;
struct hv_24x7_data_result_buffer {
@@ -104,7 +153,7 @@ struct hv_24x7_data_result_buffer {
__u8 reserved2[0x8];
/* WARNING: only valid for the first result due to variable sizes of
* results */
- struct hv_24x7_result results[1]; /* [@num_results] */
+ struct hv_24x7_result results[]; /* [@num_results] */
} __packed;
#endif
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 71a6bfee5c02..80204e064362 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e)
EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
EVENT(PM_CMPLU_STALL, 0x1e054)
EVENT(PM_INST_CMPL, 0x00002)
-EVENT(PM_BRU_CMPL, 0x10012)
+EVENT(PM_BRU_CMPL, 0x4d05e)
EVENT(PM_BR_MPRED_CMPL, 0x400f6)
/* All L1 D cache load references counted at finish, gated by reject */
@@ -56,3 +56,5 @@ EVENT(PM_RUN_CYC, 0x600f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Alternate Branch event code */
+EVENT(PM_BR_CMPL_ALT, 0x10012)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index bb28e1a41257..f17435e4a489 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -231,7 +231,7 @@ static int power9_generic_events_dd1[] = {
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT,
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
@@ -453,6 +453,12 @@ static int __init init_power9_pmu(void)
* sampling scenarios in power9 DD1, instead use PM_INST_DISP.
*/
EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
+ /*
+ * Power9 DD1 should use PM_BR_CMPL_ALT event code for
+ * "branches" to provide correct counter value.
+ */
+ EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT;
+ EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT;
rc = register_power_pmu(&power9_isa207_pmu);
} else {
rc = register_power_pmu(&power9_pmu);