Skip to content

Commit 3a146d5

Browse files
committed
AMDGPU: Partially re-add scheduling test for gfx940
31295bb reverted the original patch. Submit part of the test that happens to not hit the sanitizer error, which covers the instructions I more need test coverage for.
1 parent a282109 commit 3a146d5

File tree

1 file changed

+162
-0
lines changed

1 file changed

+162
-0
lines changed
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
2+
3+
# CHECK: Iterations: 1
4+
# CHECK: Instructions: 58
5+
# CHECK: Total Cycles: 543
6+
# CHECK: Total uOps: 58
7+
8+
# CHECK: Resources:
9+
# CHECK: [0] - HWBranch
10+
# CHECK: [1] - HWExport
11+
# CHECK: [2] - HWLGKM
12+
# CHECK: [3] - HWSALU
13+
# CHECK: [4] - HWVALU
14+
# CHECK: [5] - HWVMEM
15+
# CHECK: [6] - HWXDL
16+
17+
v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
18+
v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
19+
20+
v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
21+
v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
22+
23+
v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
24+
v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
25+
26+
v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
27+
v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
28+
29+
v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
30+
v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
31+
32+
v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
33+
v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
34+
35+
v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
36+
v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
37+
38+
v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
39+
v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
40+
41+
v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
42+
v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
43+
44+
v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
45+
v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
46+
47+
v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
48+
v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
49+
50+
v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
51+
v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
52+
53+
v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
54+
v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
55+
56+
v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
57+
v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
58+
59+
v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
60+
v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
61+
62+
v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
63+
v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
64+
65+
v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
66+
v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
67+
68+
v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
69+
v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
70+
71+
v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
72+
v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
73+
74+
v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
75+
v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
76+
77+
v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
78+
v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
79+
80+
v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
81+
v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
82+
83+
v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
84+
v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
85+
86+
v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
87+
v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
88+
89+
v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
90+
v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
91+
92+
v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
93+
v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
94+
95+
v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
96+
v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
97+
98+
v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
99+
v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
100+
101+
v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
102+
v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
103+
104+
# CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions:
105+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
106+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
107+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
108+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
109+
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
110+
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
111+
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
112+
# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
113+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
114+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
115+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
116+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
117+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
118+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
119+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
120+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
121+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
122+
# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
123+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
124+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
125+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
126+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
127+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
128+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
129+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
130+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
131+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
132+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
133+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
134+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
135+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
136+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
137+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
138+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
139+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
140+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
141+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
142+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
143+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
144+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
145+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
146+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
147+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
148+
# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
149+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
150+
# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
151+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
152+
# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
153+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
154+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
155+
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
156+
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
157+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
158+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
159+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
160+
# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
161+
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
162+
# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11

0 commit comments

Comments
 (0)