Skip to content

Commit 9bdfee2

Browse files
author
Evandro Menezes
committed
[AArch64] Add the pipeline model for Exynos M5
Add the scheduling and cost models for Exynos M5.
1 parent 25f33d8 commit 9bdfee2

30 files changed

+3349
-35
lines changed

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ include "AArch64SchedFalkor.td"
450450
include "AArch64SchedKryo.td"
451451
include "AArch64SchedExynosM3.td"
452452
include "AArch64SchedExynosM4.td"
453+
include "AArch64SchedExynosM5.td"
453454
include "AArch64SchedThunderX.td"
454455
include "AArch64SchedThunderX2T99.td"
455456

@@ -790,7 +791,7 @@ def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
790791
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
791792
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
792793
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
793-
def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>;
794+
def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
794795
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
795796
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
796797
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;

llvm/lib/Target/AArch64/AArch64SchedExynosM5.td

Lines changed: 1012 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
3+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
4+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
5+
6+
aese v0.16b, v1.16b
7+
aesmc v0.16b, v0.16b
8+
9+
aesd v0.16b, v1.16b
10+
aesimc v0.16b, v0.16b
11+
12+
# ALL: Iterations: 100
13+
# ALL-NEXT: Instructions: 400
14+
15+
# M3-NEXT: Total Cycles: 203
16+
# M4-NEXT: Total Cycles: 203
17+
# M5-NEXT: Total Cycles: 403
18+
19+
# ALL-NEXT: Total uOps: 400
20+
21+
# ALL: Dispatch Width: 6
22+
23+
# M3-NEXT: uOps Per Cycle: 1.97
24+
# M3-NEXT: IPC: 1.97
25+
26+
# M4-NEXT: uOps Per Cycle: 1.97
27+
# M4-NEXT: IPC: 1.97
28+
29+
# M5-NEXT: uOps Per Cycle: 0.99
30+
# M5-NEXT: IPC: 0.99
31+
32+
# ALL-NEXT: Block RThroughput: 2.0
33+
34+
# ALL: Instruction Info:
35+
# ALL-NEXT: [1]: #uOps
36+
# ALL-NEXT: [2]: Latency
37+
# ALL-NEXT: [3]: RThroughput
38+
# ALL-NEXT: [4]: MayLoad
39+
# ALL-NEXT: [5]: MayStore
40+
# ALL-NEXT: [6]: HasSideEffects (U)
41+
42+
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
43+
44+
# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b
45+
# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
46+
# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
47+
# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
48+
49+
# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b
50+
# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
51+
# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
52+
# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
53+
54+
# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b
55+
# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b
56+
# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b
57+
# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
3+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
4+
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
5+
6+
ld1 {v0.s}[0], [sp]
7+
ld1r {v0.2s}, [sp]
8+
ld1 {v0.2s}, [sp]
9+
ld1 {v0.2s, v1.2s}, [sp]
10+
ld1 {v0.2s, v1.2s, v2.2s}, [sp]
11+
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
12+
13+
ld1 {v0.d}[0], [sp]
14+
ld1r {v0.2d}, [sp]
15+
ld1 {v0.2d}, [sp]
16+
ld1 {v0.2d, v1.2d}, [sp]
17+
ld1 {v0.2d, v1.2d, v2.2d}, [sp]
18+
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
19+
20+
ld1 {v0.s}[0], [sp], #4
21+
ld1r {v0.2s}, [sp], #4
22+
ld1 {v0.2s}, [sp], #8
23+
ld1 {v0.2s, v1.2s}, [sp], #16
24+
ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24
25+
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
26+
27+
ld1 {v0.d}[0], [sp], #8
28+
ld1r {v0.2d}, [sp], #8
29+
ld1 {v0.2d}, [sp], #16
30+
ld1 {v0.2d, v1.2d}, [sp], #32
31+
ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48
32+
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
33+
34+
ld1 {v0.s}[0], [sp], x0
35+
ld1r {v0.2s}, [sp], x0
36+
ld1 {v0.2s}, [sp], x0
37+
ld1 {v0.2s, v1.2s}, [sp], x0
38+
ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0
39+
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
40+
41+
ld1 {v0.d}[0], [sp], x0
42+
ld1r {v0.2d}, [sp], x0
43+
ld1 {v0.2d}, [sp], x0
44+
ld1 {v0.2d, v1.2d}, [sp], x0
45+
ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0
46+
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
47+
48+
# ALL: Iterations: 100
49+
# ALL-NEXT: Instructions: 3600
50+
51+
# M3-NEXT: Total Cycles: 14903
52+
# M4-NEXT: Total Cycles: 14703
53+
# M5-NEXT: Total Cycles: 17203
54+
55+
# ALL-NEXT: Total uOps: 10200
56+
57+
# ALL: Dispatch Width: 6
58+
59+
# M3-NEXT: uOps Per Cycle: 0.68
60+
# M3-NEXT: IPC: 0.24
61+
62+
# M4-NEXT: uOps Per Cycle: 0.69
63+
# M4-NEXT: IPC: 0.24
64+
65+
# M5-NEXT: uOps Per Cycle: 0.59
66+
# M5-NEXT: IPC: 0.21
67+
68+
# ALL-NEXT: Block RThroughput: 39.0
69+
70+
# ALL: Instruction Info:
71+
# ALL-NEXT: [1]: #uOps
72+
# ALL-NEXT: [2]: Latency
73+
# ALL-NEXT: [3]: RThroughput
74+
# ALL-NEXT: [4]: MayLoad
75+
# ALL-NEXT: [5]: MayStore
76+
# ALL-NEXT: [6]: HasSideEffects (U)
77+
78+
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
79+
80+
# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
81+
# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
82+
# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
83+
# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
84+
# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
85+
# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
86+
# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
87+
# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
88+
# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
89+
# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
90+
# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
91+
# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
92+
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
93+
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
94+
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
95+
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
96+
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
97+
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
98+
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
99+
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
100+
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
101+
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
102+
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
103+
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
104+
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
105+
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
106+
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
107+
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
108+
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
109+
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
110+
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
111+
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
112+
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
113+
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
114+
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
115+
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
116+
117+
# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp]
118+
# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
119+
# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
120+
# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
121+
# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
122+
# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
123+
# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
124+
# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
125+
# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
126+
# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
127+
# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
128+
# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
129+
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4
130+
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
131+
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
132+
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
133+
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
134+
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
135+
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
136+
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
137+
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
138+
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
139+
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
140+
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
141+
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0
142+
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
143+
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
144+
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
145+
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
146+
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
147+
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
148+
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
149+
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
150+
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
151+
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
152+
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
153+
154+
# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
155+
# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp]
156+
# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp]
157+
# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp]
158+
# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
159+
# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
160+
# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp]
161+
# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp]
162+
# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp]
163+
# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp]
164+
# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
165+
# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
166+
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
167+
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4
168+
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8
169+
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
170+
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
171+
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
172+
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8
173+
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8
174+
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16
175+
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
176+
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
177+
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
178+
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
179+
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0
180+
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0
181+
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
182+
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
183+
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
184+
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0
185+
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0
186+
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0
187+
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
188+
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
189+
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0

0 commit comments

Comments
 (0)