1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
/*
* Linux performance counter support for ARC
*
* Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H
/* real maximum varies per CPU, this is the maximum supported by the driver */
#define ARC_PMU_MAX_HWEVENTS 64
#define ARC_REG_CC_BUILD 0xF6
#define ARC_REG_CC_INDEX 0x240
#define ARC_REG_CC_NAME0 0x241
#define ARC_REG_CC_NAME1 0x242
#define ARC_REG_PCT_BUILD 0xF5
#define ARC_REG_PCT_COUNTL 0x250
#define ARC_REG_PCT_COUNTH 0x251
#define ARC_REG_PCT_SNAPL 0x252
#define ARC_REG_PCT_SNAPH 0x253
#define ARC_REG_PCT_CONFIG 0x254
#define ARC_REG_PCT_CONTROL 0x255
#define ARC_REG_PCT_INDEX 0x256
#define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */
#define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */
struct arc_reg_pct_build {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int m:8, c:8, r:6, s:2, v:8;
#else
unsigned int v:8, s:2, r:6, c:8, m:8;
#endif
};
struct arc_reg_cc_build {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int c:16, r:8, v:8;
#else
unsigned int v:8, r:8, c:16;
#endif
};
#define PERF_COUNT_ARC_DCLM (PERF_COUNT_HW_MAX + 0)
#define PERF_COUNT_ARC_DCSM (PERF_COUNT_HW_MAX + 1)
#define PERF_COUNT_ARC_ICM (PERF_COUNT_HW_MAX + 2)
#define PERF_COUNT_ARC_BPOK (PERF_COUNT_HW_MAX + 3)
#define PERF_COUNT_ARC_EDTLB (PERF_COUNT_HW_MAX + 4)
#define PERF_COUNT_ARC_EITLB (PERF_COUNT_HW_MAX + 5)
#define PERF_COUNT_ARC_LDC (PERF_COUNT_HW_MAX + 6)
#define PERF_COUNT_ARC_STC (PERF_COUNT_HW_MAX + 7)
#define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 8)
/*
* Some ARC pct quirks:
*
* PERF_COUNT_HW_STALLED_CYCLES_BACKEND
* PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
* The ARC 700 can either measure stalls per pipeline stage, or all stalls
* combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
* and all pipeline flushes (e.g. caused by mispredicts, etc.) to
* STALLED_CYCLES_FRONTEND.
*
* We could start multiple performance counters and combine everything
* afterwards, but that makes it complicated.
*
* Note that I$ cache misses aren't counted by either of the two!
*/
/*
* ARC PCT has hardware conditions with fixed "names" but variable "indexes"
* (based on a specific RTL build)
* Below is the static map between perf generic/arc specific event_id and
* h/w condition names.
* At the time of probe, we loop thru each index and find it's name to
* complete the mapping of perf event_id to h/w index as latter is needed
* to program the counter really
*/
static const char * const arc_pmu_ev_hw_map[] = {
/* count cycles */
[PERF_COUNT_HW_CPU_CYCLES] = "crun",
[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
[PERF_COUNT_HW_BUS_CYCLES] = "crun",
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
/* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
[PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
[PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
[PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */
[PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */
[PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */
[PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */
[PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */
};
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xffff
static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
[C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_ARC_STC,
[C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_HW_INSTRUCTIONS,
[C(RESULT_MISS)] = PERF_COUNT_ARC_ICM,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC,
[C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB,
},
/* DTLB LD/ST Miss not segregated by h/w*/
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
[C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
#endif /* __ASM_PERF_EVENT_H */
|