Skip to content

Commit ecd94f1

Browse files
Kan Liangacmel
authored andcommitted
perf vendor events: Add JSON metrics for Cascadelake server
Add JSON metrics (based on event list v1) for Cascadelake server Signed-off-by: Kan Liang <[email protected]> Acked-by: Jiri Olsa <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 3b54411 commit ecd94f1

File tree

11 files changed

+31347
-0
lines changed

11 files changed

+31347
-0
lines changed

tools/perf/pmu-events/arch/x86/cascadelakex/cache.json

Lines changed: 10172 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
[
2+
{
3+
"BriefDescription": "Instructions Per Cycle (per logical thread)",
4+
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
5+
"MetricGroup": "TopDownL1",
6+
"MetricName": "IPC"
7+
},
8+
{
9+
"BriefDescription": "Uops Per Instruction",
10+
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
11+
"MetricGroup": "Pipeline",
12+
"MetricName": "UPI"
13+
},
14+
{
15+
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
16+
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
17+
"MetricGroup": "Frontend",
18+
"MetricName": "IFetch_Line_Utilization"
19+
},
20+
{
21+
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
22+
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
23+
"MetricGroup": "DSB; Frontend_Bandwidth",
24+
"MetricName": "DSB_Coverage"
25+
},
26+
{
27+
"BriefDescription": "Cycles Per Instruction (threaded)",
28+
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
29+
"MetricGroup": "Pipeline;Summary",
30+
"MetricName": "CPI"
31+
},
32+
{
33+
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
34+
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
35+
"MetricGroup": "Summary",
36+
"MetricName": "CLKS"
37+
},
38+
{
39+
"BriefDescription": "Total issue-pipeline slots",
40+
"MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
41+
"MetricGroup": "TopDownL1",
42+
"MetricName": "SLOTS"
43+
},
44+
{
45+
"BriefDescription": "Total number of retired Instructions",
46+
"MetricExpr": "INST_RETIRED.ANY",
47+
"MetricGroup": "Summary",
48+
"MetricName": "Instructions"
49+
},
50+
{
51+
"BriefDescription": "Instructions Per Cycle (per physical core)",
52+
"MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
53+
"MetricGroup": "SMT",
54+
"MetricName": "CoreIPC"
55+
},
56+
{
57+
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
58+
"MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
59+
"MetricGroup": "Pipeline;Ports_Utilization",
60+
"MetricName": "ILP"
61+
},
62+
{
63+
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
64+
"MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
65+
"MetricGroup": "Unknown_Branches",
66+
"MetricName": "BAClear_Cost"
67+
},
68+
{
69+
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
70+
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
71+
"MetricGroup": "SMT",
72+
"MetricName": "CORE_CLKS"
73+
},
74+
{
75+
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76+
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
77+
"MetricGroup": "Memory_Bound;Memory_Lat",
78+
"MetricName": "Load_Miss_Real_Latency"
79+
},
80+
{
81+
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
82+
"MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
83+
"MetricGroup": "Memory_Bound;Memory_BW",
84+
"MetricName": "MLP"
85+
},
86+
{
87+
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
88+
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
89+
"MetricGroup": "TLB",
90+
"MetricName": "Page_Walks_Utilization"
91+
},
92+
{
93+
"BriefDescription": "Average CPU Utilization",
94+
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
95+
"MetricGroup": "Summary",
96+
"MetricName": "CPU_Utilization"
97+
},
98+
{
99+
"BriefDescription": "Giga Floating Point Operations Per Second",
100+
"MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
101+
"MetricGroup": "FLOPS;Summary",
102+
"MetricName": "GFLOPs"
103+
},
104+
{
105+
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
106+
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
107+
"MetricGroup": "Power",
108+
"MetricName": "Turbo_Utilization"
109+
},
110+
{
111+
"BriefDescription": "Fraction of cycles where both hardware threads were active",
112+
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
113+
"MetricGroup": "SMT;Summary",
114+
"MetricName": "SMT_2T_Utilization"
115+
},
116+
{
117+
"BriefDescription": "Fraction of cycles spent in Kernel mode",
118+
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
119+
"MetricGroup": "Summary",
120+
"MetricName": "Kernel_Utilization"
121+
},
122+
{
123+
"BriefDescription": "C3 residency percent per core",
124+
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
125+
"MetricGroup": "Power",
126+
"MetricName": "C3_Core_Residency"
127+
},
128+
{
129+
"BriefDescription": "C6 residency percent per core",
130+
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
131+
"MetricGroup": "Power",
132+
"MetricName": "C6_Core_Residency"
133+
},
134+
{
135+
"BriefDescription": "C7 residency percent per core",
136+
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
137+
"MetricGroup": "Power",
138+
"MetricName": "C7_Core_Residency"
139+
},
140+
{
141+
"BriefDescription": "C2 residency percent per package",
142+
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
143+
"MetricGroup": "Power",
144+
"MetricName": "C2_Pkg_Residency"
145+
},
146+
{
147+
"BriefDescription": "C3 residency percent per package",
148+
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
149+
"MetricGroup": "Power",
150+
"MetricName": "C3_Pkg_Residency"
151+
},
152+
{
153+
"BriefDescription": "C6 residency percent per package",
154+
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
155+
"MetricGroup": "Power",
156+
"MetricName": "C6_Pkg_Residency"
157+
},
158+
{
159+
"BriefDescription": "C7 residency percent per package",
160+
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
161+
"MetricGroup": "Power",
162+
"MetricName": "C7_Pkg_Residency"
163+
}
164+
]
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
[
2+
{
3+
"EventCode": "0xC7",
4+
"UMask": "0x1",
5+
"BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
6+
"Counter": "0,1,2,3",
7+
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
8+
"SampleAfterValue": "2000003",
9+
"CounterHTOff": "0,1,2,3,4,5,6,7"
10+
},
11+
{
12+
"EventCode": "0xC7",
13+
"UMask": "0x2",
14+
"BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
15+
"Counter": "0,1,2,3",
16+
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
17+
"SampleAfterValue": "2000003",
18+
"CounterHTOff": "0,1,2,3,4,5,6,7"
19+
},
20+
{
21+
"EventCode": "0xC7",
22+
"UMask": "0x4",
23+
"BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
24+
"Counter": "0,1,2,3",
25+
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
26+
"SampleAfterValue": "2000003",
27+
"CounterHTOff": "0,1,2,3,4,5,6,7"
28+
},
29+
{
30+
"EventCode": "0xC7",
31+
"UMask": "0x8",
32+
"BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 4 calculations per element.",
33+
"Counter": "0,1,2,3",
34+
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
35+
"SampleAfterValue": "2000003",
36+
"CounterHTOff": "0,1,2,3,4,5,6,7"
37+
},
38+
{
39+
"EventCode": "0xC7",
40+
"UMask": "0x10",
41+
"BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 4 calculations per element.",
42+
"Counter": "0,1,2,3",
43+
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
44+
"SampleAfterValue": "2000003",
45+
"CounterHTOff": "0,1,2,3,4,5,6,7"
46+
},
47+
{
48+
"EventCode": "0xC7",
49+
"UMask": "0x20",
50+
"BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
51+
"Counter": "0,1,2,3",
52+
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
53+
"SampleAfterValue": "2000003",
54+
"CounterHTOff": "0,1,2,3,4,5,6,7"
55+
},
56+
{
57+
"EventCode": "0xC7",
58+
"UMask": "0x40",
59+
"BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
60+
"Counter": "0,1,2,3",
61+
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
62+
"SampleAfterValue": "2000003",
63+
"CounterHTOff": "0,1,2,3,4,5,6,7"
64+
},
65+
{
66+
"EventCode": "0xC7",
67+
"UMask": "0x80",
68+
"BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
69+
"Counter": "0,1,2,3",
70+
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
71+
"SampleAfterValue": "2000003",
72+
"CounterHTOff": "0,1,2,3,4,5,6,7"
73+
},
74+
{
75+
"EventCode": "0xCA",
76+
"UMask": "0x1e",
77+
"BriefDescription": "Cycles with any input/output SSE or FP assist",
78+
"Counter": "0,1,2,3",
79+
"EventName": "FP_ASSIST.ANY",
80+
"CounterMask": "1",
81+
"PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
82+
"SampleAfterValue": "100003",
83+
"CounterHTOff": "0,1,2,3,4,5,6,7"
84+
}
85+
]

0 commit comments

Comments
 (0)