Skip to content

Commit a64e697

Browse files
athira-rajeevmpe
authored andcommitted
powerpc/perf: power10 Performance Monitoring support
Base enablement patch to register performance monitoring hardware support for power10. Patch introduce the raw event encoding format, defines the supported list of events, config fields for the event attributes and their corresponding bit values which are exported via sysfs. Patch also enhances the support function in isa207_common.c to include power10 pmu hardware. Reported-by: kernel test robot <[email protected]> Signed-off-by: Madhavan Srinivasan <[email protected]> Signed-off-by: Athira Rajeev <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 9908c82 commit a64e697

File tree

7 files changed

+566
-11
lines changed

7 files changed

+566
-11
lines changed

arch/powerpc/perf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
99
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
1010
power5+-pmu.o power6-pmu.o power7-pmu.o \
1111
isa207-common.o power8-pmu.o power9-pmu.o \
12-
generic-compat-pmu.o
12+
generic-compat-pmu.o power10-pmu.o
1313
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
1414

1515
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o

arch/powerpc/perf/core-book3s.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2333,6 +2333,8 @@ static int __init init_ppc64_pmu(void)
23332333
return 0;
23342334
else if (!init_power9_pmu())
23352335
return 0;
2336+
else if (!init_power10_pmu())
2337+
return 0;
23362338
else if (!init_ppc970_pmu())
23372339
return 0;
23382340
else

arch/powerpc/perf/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ extern int init_power6_pmu(void);
99
extern int init_power7_pmu(void);
1010
extern int init_power8_pmu(void);
1111
extern int init_power9_pmu(void);
12+
extern int init_power10_pmu(void);
1213
extern int init_generic_compat_pmu(void);

arch/powerpc/perf/isa207-common.c

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ static bool is_event_valid(u64 event)
5555
{
5656
u64 valid_mask = EVENT_VALID_MASK;
5757

58-
if (cpu_has_feature(CPU_FTR_ARCH_300))
58+
if (cpu_has_feature(CPU_FTR_ARCH_31))
59+
valid_mask = p10_EVENT_VALID_MASK;
60+
else if (cpu_has_feature(CPU_FTR_ARCH_300))
5961
valid_mask = p9_EVENT_VALID_MASK;
6062

6163
return !(event & ~valid_mask);
@@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event)
6971
return false;
7072
}
7173

74+
static unsigned long sdar_mod_val(u64 event)
75+
{
76+
if (cpu_has_feature(CPU_FTR_ARCH_31))
77+
return p10_SDAR_MODE(event);
78+
79+
return p9_SDAR_MODE(event);
80+
}
81+
7282
static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
7383
{
7484
/*
@@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
7989
* MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
8090
* mode and will be un-changed when setting MMCRA[63] (Marked events).
8191
*
82-
* Incase of Power9:
92+
* Incase of Power9/power10:
8393
* Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
8494
* or if group already have any marked events.
8595
* For rest
@@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
90100
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
91101
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
92102
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
93-
else if (p9_SDAR_MODE(event))
94-
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
103+
else if (sdar_mod_val(event))
104+
*mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
95105
else
96106
*mmcra |= MMCRA_SDAR_MODE_DCACHE;
97107
} else
@@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event)
134144
/*
135145
* Check the mantissa upper two bits are not zero, unless the
136146
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
147+
* Power10: thresh_cmp is replaced by l2_l3 event select.
137148
*/
149+
if (cpu_has_feature(CPU_FTR_ARCH_31))
150+
return false;
151+
138152
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
139153
exp = cmp >> 7;
140154

@@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
251265

252266
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
253267
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
254-
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
268+
if (cpu_has_feature(CPU_FTR_ARCH_31))
269+
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
270+
p10_EVENT_CACHE_SEL_MASK;
271+
else
272+
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
273+
EVENT_CACHE_SEL_MASK;
255274
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
256275

257276
if (pmc) {
@@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
283302
}
284303

285304
if (unit >= 6 && unit <= 9) {
286-
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
305+
if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
306+
mask |= CNST_L2L3_GROUP_MASK;
307+
value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
308+
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
287309
mask |= CNST_CACHE_GROUP_MASK;
288310
value |= CNST_CACHE_GROUP_VAL(event & 0xff);
289311

@@ -367,6 +389,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
367389
struct perf_event *pevents[])
368390
{
369391
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
392+
unsigned long mmcr3;
370393
unsigned int pmc, pmc_inuse;
371394
int i;
372395

@@ -379,7 +402,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
379402
pmc_inuse |= 1 << pmc;
380403
}
381404

382-
mmcra = mmcr1 = mmcr2 = 0;
405+
mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
383406

384407
/* Second pass: assign PMCs, set all MMCR1 fields */
385408
for (i = 0; i < n_ev; ++i) {
@@ -438,8 +461,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
438461
mmcra |= val << MMCRA_THR_CTL_SHIFT;
439462
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
440463
mmcra |= val << MMCRA_THR_SEL_SHIFT;
441-
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
442-
mmcra |= thresh_cmp_val(val);
464+
if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
465+
val = (event[i] >> EVENT_THR_CMP_SHIFT) &
466+
EVENT_THR_CMP_MASK;
467+
mmcra |= thresh_cmp_val(val);
468+
}
469+
}
470+
471+
if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
472+
val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
473+
p10_EVENT_L2L3_SEL_MASK;
474+
mmcr2 |= val << p10_L2L3_SEL_SHIFT;
443475
}
444476

445477
if (event[i] & EVENT_WANTS_BHRB) {
@@ -460,6 +492,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
460492
mmcr2 |= MMCR2_FCS(pmc);
461493
}
462494

495+
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
496+
if (pmc <= 4) {
497+
val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
498+
p10_EVENT_MMCR3_MASK;
499+
mmcr3 |= val << MMCR3_SHIFT(pmc);
500+
}
501+
}
502+
463503
hwc[i] = pmc - 1;
464504
}
465505

@@ -480,6 +520,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
480520
mmcr->mmcr1 = mmcr1;
481521
mmcr->mmcra = mmcra;
482522
mmcr->mmcr2 = mmcr2;
523+
mmcr->mmcr3 = mmcr3;
483524

484525
return 0;
485526
}

arch/powerpc/perf/isa207-common.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,31 @@
8787
EVENT_LINUX_MASK | \
8888
EVENT_PSEL_MASK))
8989

90+
/* Contants to support power10 raw encoding format */
91+
#define p10_SDAR_MODE_SHIFT 22
92+
#define p10_SDAR_MODE_MASK 0x3ull
93+
#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \
94+
p10_SDAR_MODE_MASK)
95+
#define p10_EVENT_L2L3_SEL_MASK 0x1f
96+
#define p10_L2L3_SEL_SHIFT 3
97+
#define p10_L2L3_EVENT_SHIFT 40
98+
#define p10_EVENT_THRESH_MASK 0xffffull
99+
#define p10_EVENT_CACHE_SEL_MASK 0x3ull
100+
#define p10_EVENT_MMCR3_MASK 0x7fffull
101+
#define p10_EVENT_MMCR3_SHIFT 45
102+
103+
#define p10_EVENT_VALID_MASK \
104+
((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
105+
(p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
106+
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
107+
(p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
108+
(EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
109+
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
110+
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
111+
(p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
112+
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
113+
EVENT_LINUX_MASK | \
114+
EVENT_PSEL_MASK))
90115
/*
91116
* Layout of constraint bits:
92117
*
@@ -135,6 +160,9 @@
135160
#define CNST_CACHE_PMC4_VAL (1ull << 54)
136161
#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
137162

163+
#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
164+
#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)
165+
138166
/*
139167
* For NC we are counting up to 4 events. This requires three bits, and we need
140168
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -191,7 +219,7 @@
191219
#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
192220
MMCRA_THR_CTR_EXP_MASK)
193221

194-
/* MMCR1 Threshold Compare bit constant for power9 */
222+
/* MMCRA Threshold Compare bit constant for power9 */
195223
#define p9_MMCRA_THR_CMP_SHIFT 45
196224

197225
/* Bits in MMCR2 for PowerISA v2.07 */
@@ -202,6 +230,9 @@
202230
#define MAX_ALT 2
203231
#define MAX_PMU_COUNTERS 6
204232

233+
/* Bits in MMCR3 for PowerISA v3.10 */
234+
#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1)))
235+
205236
#define ISA207_SIER_TYPE_SHIFT 15
206237
#define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)
207238

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
/*
3+
* Performance counter support for POWER10 processors.
4+
*
5+
* Copyright 2020 Madhavan Srinivasan, IBM Corporation.
6+
* Copyright 2020 Athira Rajeev, IBM Corporation.
7+
*/
8+
9+
/*
10+
* Power10 event codes.
11+
*/
12+
EVENT(PM_RUN_CYC, 0x600f4);
13+
EVENT(PM_DISP_STALL_CYC, 0x100f8);
14+
EVENT(PM_EXEC_STALL, 0x30008);
15+
EVENT(PM_RUN_INST_CMPL, 0x500fa);
16+
EVENT(PM_BR_CMPL, 0x4d05e);
17+
EVENT(PM_BR_MPRED_CMPL, 0x400f6);
18+
19+
/* All L1 D cache load references counted at finish, gated by reject */
20+
EVENT(PM_LD_REF_L1, 0x100fc);
21+
/* Load Missed L1 */
22+
EVENT(PM_LD_MISS_L1, 0x3e054);
23+
/* Store Missed L1 */
24+
EVENT(PM_ST_MISS_L1, 0x300f0);
25+
/* L1 cache data prefetches */
26+
EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c);
27+
/* Demand iCache Miss */
28+
EVENT(PM_L1_ICACHE_MISS, 0x200fc);
29+
/* Instruction fetches from L1 */
30+
EVENT(PM_INST_FROM_L1, 0x04080);
31+
/* Instruction Demand sectors wriittent into IL1 */
32+
EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040);
33+
/* Instruction prefetch written into IL1 */
34+
EVENT(PM_IC_PREF_REQ, 0x040a0);
35+
/* The data cache was reloaded from local core's L3 due to a demand load */
36+
EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
37+
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
38+
EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
39+
/* Data PTEG reload */
40+
EVENT(PM_DTLB_MISS, 0x300fc);
41+
/* ITLB Reloaded */
42+
EVENT(PM_ITLB_MISS, 0x400fc);
43+
44+
EVENT(PM_RUN_CYC_ALT, 0x0001e);
45+
EVENT(PM_RUN_INST_CMPL_ALT, 0x00002);
46+
47+
/*
48+
* Memory Access Events
49+
*
50+
* Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
51+
* To enable capturing of memory profiling, these MMCRA bits
52+
* needs to be programmed and corresponding raw event format
53+
* encoding.
54+
*
55+
* MMCRA bits encoding needed are
56+
* SM (Sampling Mode)
57+
* EM (Eligibility for Random Sampling)
58+
* TECE (Threshold Event Counter Event)
59+
* TS (Threshold Start Event)
60+
* TE (Threshold End Event)
61+
*
62+
* Corresponding Raw Encoding bits:
63+
* sample [EM,SM]
64+
* thresh_sel (TECE)
65+
* thresh start (TS)
66+
* thresh end (TE)
67+
*/
68+
69+
EVENT(MEM_LOADS, 0x34340401e0);
70+
EVENT(MEM_STORES, 0x343c0401e0);

0 commit comments

Comments
 (0)