1
1
// REQUIRES: amdgpu-registered-target
2
- // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck %s
2
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908
3
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A
3
4
4
5
#pragma OPENCL EXTENSION cl_khr_fp64:enable
5
6
@@ -19,143 +20,199 @@ typedef short v32s __attribute__((ext_vector_type(32)));
19
20
typedef double v4d __attribute__((ext_vector_type (4 )));
20
21
21
22
22
- // CHECK-LABEL: @test_mfma_f32_32x32x1f32
23
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
23
+ #ifdef MFMA_GFX908_TESTS
24
+
25
+ // CHECK-GFX908-LABEL: @test_mfma_f32_32x32x1f32
26
+ // CHECK-GFX908: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
24
27
void test_mfma_f32_32x32x1f32 (global v32f * out , float a , float b , v32f c )
25
28
{
26
29
* out = __builtin_amdgcn_mfma_f32_32x32x1f32 (a , b , c , 0 , 0 , 0 );
27
30
}
28
31
29
- // CHECK-LABEL: @test_mfma_f32_16x16x1f32
30
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0)
32
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x1f32
33
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0)
31
34
void test_mfma_f32_16x16x1f32 (global v16f * out , float a , float b , v16f c )
32
35
{
33
36
* out = __builtin_amdgcn_mfma_f32_16x16x1f32 (a , b , c , 0 , 0 , 0 );
34
37
}
35
38
36
- // CHECK-LABEL: @test_mfma_f32_4x4x1f32
37
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0)
39
+ // CHECK-GFX908- LABEL: @test_mfma_f32_4x4x1f32
40
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0)
38
41
void test_mfma_f32_4x4x1f32 (global v4f * out , float a , float b , v4f c )
39
42
{
40
43
* out = __builtin_amdgcn_mfma_f32_4x4x1f32 (a , b , c , 0 , 0 , 0 );
41
44
}
42
45
43
- // CHECK-LABEL: @test_mfma_f32_32x32x2f32
44
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0)
46
+ // CHECK-GFX908- LABEL: @test_mfma_f32_32x32x2f32
47
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %a, float %b, <16 x float> %c, i32 0, i32 0, i32 0)
45
48
void test_mfma_f32_32x32x2f32 (global v16f * out , float a , float b , v16f c )
46
49
{
47
50
* out = __builtin_amdgcn_mfma_f32_32x32x2f32 (a , b , c , 0 , 0 , 0 );
48
51
}
49
52
50
- // CHECK-LABEL: @test_mfma_f32_16x16x4f32
51
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0)
53
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x4f32
54
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %a, float %b, <4 x float> %c, i32 0, i32 0, i32 0)
52
55
void test_mfma_f32_16x16x4f32 (global v4f * out , float a , float b , v4f c )
53
56
{
54
57
* out = __builtin_amdgcn_mfma_f32_16x16x4f32 (a , b , c , 0 , 0 , 0 );
55
58
}
56
59
57
- // CHECK-LABEL: @test_mfma_f32_32x32x4f16
58
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %a, <4 x half> %b, <32 x float> %c, i32 0, i32 0, i32 0)
60
+ // CHECK-GFX908- LABEL: @test_mfma_f32_32x32x4f16
61
+ // CHECK-GFX908 : call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %a, <4 x half> %b, <32 x float> %c, i32 0, i32 0, i32 0)
59
62
void test_mfma_f32_32x32x4f16 (global v32f * out , v4h a , v4h b , v32f c )
60
63
{
61
64
* out = __builtin_amdgcn_mfma_f32_32x32x4f16 (a , b , c , 0 , 0 , 0 );
62
65
}
63
66
64
- // CHECK-LABEL: @test_mfma_f32_16x16x4f16
65
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
67
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x4f16
68
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
66
69
void test_mfma_f32_16x16x4f16 (global v16f * out , v4h a , v4h b , v16f c )
67
70
{
68
71
* out = __builtin_amdgcn_mfma_f32_16x16x4f16 (a , b , c , 0 , 0 , 0 );
69
72
}
70
73
71
- // CHECK-LABEL: @test_mfma_f32_4x4x4f16
72
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0)
74
+ // CHECK-GFX908- LABEL: @test_mfma_f32_4x4x4f16
75
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0)
73
76
void test_mfma_f32_4x4x4f16 (global v4f * out , v4h a , v4h b , v4f c )
74
77
{
75
78
* out = __builtin_amdgcn_mfma_f32_4x4x4f16 (a , b , c , 0 , 0 , 0 );
76
79
}
77
80
78
- // CHECK-LABEL: @test_mfma_f32_32x32x8f16
79
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
81
+ // CHECK-GFX908- LABEL: @test_mfma_f32_32x32x8f16
82
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %a, <4 x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
80
83
void test_mfma_f32_32x32x8f16 (global v16f * out , v4h a , v4h b , v16f c )
81
84
{
82
85
* out = __builtin_amdgcn_mfma_f32_32x32x8f16 (a , b , c , 0 , 0 , 0 );
83
86
}
84
87
85
- // CHECK-LABEL: @test_mfma_f32_16x16x16f16
86
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0)
88
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x16f16
89
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %a, <4 x half> %b, <4 x float> %c, i32 0, i32 0, i32 0)
87
90
void test_mfma_f32_16x16x16f16 (global v4f * out , v4h a , v4h b , v4f c )
88
91
{
89
92
* out = __builtin_amdgcn_mfma_f32_16x16x16f16 (a , b , c , 0 , 0 , 0 );
90
93
}
91
94
92
- // CHECK-LABEL: @test_mfma_i32_32x32x4i8
93
- // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %a, i32 %b, <32 x i32> %c, i32 0, i32 0, i32 0)
95
+ // CHECK-GFX908- LABEL: @test_mfma_i32_32x32x4i8
96
+ // CHECK-GFX908 : call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %a, i32 %b, <32 x i32> %c, i32 0, i32 0, i32 0)
94
97
void test_mfma_i32_32x32x4i8 (global v32i * out , int a , int b , v32i c )
95
98
{
96
99
* out = __builtin_amdgcn_mfma_i32_32x32x4i8 (a , b , c , 0 , 0 , 0 );
97
100
}
98
101
99
- // CHECK-LABEL: @test_mfma_i32_16x16x4i8
100
- // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
102
+ // CHECK-GFX908- LABEL: @test_mfma_i32_16x16x4i8
103
+ // CHECK-GFX908 : call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
101
104
void test_mfma_i32_16x16x4i8 (global v16i * out , int a , int b , v16i c )
102
105
{
103
106
* out = __builtin_amdgcn_mfma_i32_16x16x4i8 (a , b , c , 0 , 0 , 0 );
104
107
}
105
108
106
- // CHECK-LABEL: @test_mfma_i32_4x4x4i8
107
- // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
109
+ // CHECK-GFX908- LABEL: @test_mfma_i32_4x4x4i8
110
+ // CHECK-GFX908 : call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
108
111
void test_mfma_i32_4x4x4i8 (global v4i * out , int a , int b , v4i c )
109
112
{
110
113
* out = __builtin_amdgcn_mfma_i32_4x4x4i8 (a , b , c , 0 , 0 , 0 );
111
114
}
112
115
113
- // CHECK-LABEL: @test_mfma_i32_32x32x8i8
114
- // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
116
+ // CHECK-GFX908- LABEL: @test_mfma_i32_32x32x8i8
117
+ // CHECK-GFX908 : call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %a, i32 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
115
118
void test_mfma_i32_32x32x8i8 (global v16i * out , int a , int b , v16i c )
116
119
{
117
120
* out = __builtin_amdgcn_mfma_i32_32x32x8i8 (a , b , c , 0 , 0 , 0 );
118
121
}
119
122
120
- // CHECK-LABEL: @test_mfma_i32_16x16x16i8
121
- // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
123
+ // CHECK-GFX908- LABEL: @test_mfma_i32_16x16x16i8
124
+ // CHECK-GFX908 : call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %a, i32 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
122
125
void test_mfma_i32_16x16x16i8 (global v4i * out , int a , int b , v4i c )
123
126
{
124
127
* out = __builtin_amdgcn_mfma_i32_16x16x16i8 (a , b , c , 0 , 0 , 0 );
125
128
}
126
129
127
- // CHECK-LABEL: @test_mfma_f32_32x32x2bf16
128
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %c, i32 0, i32 0, i32 0)
130
+ // CHECK-GFX908- LABEL: @test_mfma_f32_32x32x2bf16
131
+ // CHECK-GFX908 : call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %c, i32 0, i32 0, i32 0)
129
132
void test_mfma_f32_32x32x2bf16 (global v32f * out , v2s a , v2s b , v32f c )
130
133
{
131
134
* out = __builtin_amdgcn_mfma_f32_32x32x2bf16 (a , b , c , 0 , 0 , 0 );
132
135
}
133
136
134
- // CHECK-LABEL: @test_mfma_f32_16x16x2bf16
135
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
137
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x2bf16
138
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
136
139
void test_mfma_f32_16x16x2bf16 (global v16f * out , v2s a , v2s b , v16f c )
137
140
{
138
141
* out = __builtin_amdgcn_mfma_f32_16x16x2bf16 (a , b , c , 0 , 0 , 0 );
139
142
}
140
143
141
- // CHECK-LABEL: @test_mfma_f32_4x4x2bf16
142
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
144
+ // CHECK-GFX908- LABEL: @test_mfma_f32_4x4x2bf16
145
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
143
146
void test_mfma_f32_4x4x2bf16 (global v4f * out , v2s a , v2s b , v4f c )
144
147
{
145
148
* out = __builtin_amdgcn_mfma_f32_4x4x2bf16 (a , b , c , 0 , 0 , 0 );
146
149
}
147
150
148
- // CHECK-LABEL: @test_mfma_f32_32x32x4bf16
149
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
151
+ // CHECK-GFX908- LABEL: @test_mfma_f32_32x32x4bf16
152
+ // CHECK-GFX908 : call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
150
153
void test_mfma_f32_32x32x4bf16 (global v16f * out , v2s a , v2s b , v16f c )
151
154
{
152
155
* out = __builtin_amdgcn_mfma_f32_32x32x4bf16 (a , b , c , 0 , 0 , 0 );
153
156
}
154
157
155
- // CHECK-LABEL: @test_mfma_f32_16x16x8bf16
156
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
158
+ // CHECK-GFX908- LABEL: @test_mfma_f32_16x16x8bf16
159
+ // CHECK-GFX908 : call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
157
160
void test_mfma_f32_16x16x8bf16 (global v4f * out , v2s a , v2s b , v4f c )
158
161
{
159
162
* out = __builtin_amdgcn_mfma_f32_16x16x8bf16 (a , b , c , 0 , 0 , 0 );
160
163
}
161
164
165
+ #endif // MFMA_GFX908_TESTS
166
+
167
+ #ifdef MFMA_GFX90A_TESTS
168
+
169
+ // CHECK-GFX90A-LABEL: @test_mfma_f32_32x32x4bf16_1k
170
+ // CHECK-GFX90A: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <32 x float> %c, i32 0, i32 0, i32 0)
171
+ void test_mfma_f32_32x32x4bf16_1k (global v32f * out , v4s a , v4s b , v32f c )
172
+ {
173
+ * out = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k (a , b , c , 0 , 0 , 0 );
174
+ }
175
+
176
+ // CHECK-GFX90A-LABEL: @test_mfma_f32_16x16x4bf16_1k
177
+ // CHECK-GFX90A: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
178
+ void test_mfma_f32_16x16x4bf16_1k (global v16f * out , v4s a , v4s b , v16f c )
179
+ {
180
+ * out = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k (a , b , c , 0 , 0 , 0 );
181
+ }
182
+
183
+ // CHECK-GFX90A-LABEL: @test_mfma_f32_4x4x4bf16_1k
184
+ // CHECK-GFX90A: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
185
+ void test_mfma_f32_4x4x4bf16_1k (global v4f * out , v4s a , v4s b , v4f c )
186
+ {
187
+ * out = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k (a , b , c , 0 , 0 , 0 );
188
+ }
189
+
190
+ // CHECK-GFX90A-LABEL: @test_mfma_f32_32x32x8bf16_1k
191
+ // CHECK-GFX90A: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %c, i32 0, i32 0, i32 0)
192
+ void test_mfma_f32_32x32x8bf16_1k (global v16f * out , v4s a , v4s b , v16f c )
193
+ {
194
+ * out = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k (a , b , c , 0 , 0 , 0 );
195
+ }
196
+
197
+ // CHECK-GFX90A-LABEL: @test_mfma_f32_16x16x16bf16_1k
198
+ // CHECK-GFX90A: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %c, i32 0, i32 0, i32 0)
199
+ void test_mfma_f32_16x16x16bf16_1k (global v4f * out , v4s a , v4s b , v4f c )
200
+ {
201
+ * out = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k (a , b , c , 0 , 0 , 0 );
202
+ }
203
+
204
+ // CHECK-GFX90A-LABEL: @test_mfma_f64_16x16x4f64
205
+ // CHECK-GFX90A: call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %c, i32 0, i32 0, i32 0)
206
+ void test_mfma_f64_16x16x4f64 (global v4d * out , double a , double b , v4d c )
207
+ {
208
+ * out = __builtin_amdgcn_mfma_f64_16x16x4f64 (a , b , c , 0 , 0 , 0 );
209
+ }
210
+
211
+ // CHECK-GFX90A-LABEL: @test_mfma_f64_4x4x4f64
212
+ // CHECK-GFX90A: call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %c, i32 0, i32 0, i32 0)
213
+ void test_mfma_f64_4x4x4f64 (global double * out , double a , double b , double c )
214
+ {
215
+ * out = __builtin_amdgcn_mfma_f64_4x4x4f64 (a , b , c , 0 , 0 , 0 );
216
+ }
217
+
218
+ #endif // MFMA_GFX90A_TESTS
0 commit comments