@@ -39,27 +39,15 @@ int main() {
39
39
joint_matrix<int8_t , matrix_use::b, 16 , 16 , matrix_layout::row_major>
40
40
sub_b;
41
41
42
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
43
- // @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.s32.p1i32(i32
44
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
42
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
45
43
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
46
- // CHECK: tail call { i32, i32 }
47
- // @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.s8.p0i32(i32*
48
- // %call.ascast.i.i49.i, i32 16) #{{.*}}
44
+ // CHECK: tail call { i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.s8.p0i32(i32* %call.ascast.i.i52.i, i32 16) #{{.*}}
49
45
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
50
- // CHECK: tail call { i32, i32 }
51
- // @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.s8.p0i32(i32*
52
- // %call.ascast.i.i.i, i32 16) #{{.*}}
46
+ // CHECK: tail call { i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
53
47
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
54
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
55
- // @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8(i32 %19, i32 %20, i32 %23,
56
- // i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32
57
- // %15, i32 %16) #{{.*}}
48
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8(i32 %11, i32 %12, i32 %15, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
58
49
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
59
- // CHECK: tail call void
60
- // @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.s32.p1i32(i32
61
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
62
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
50
+ // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
63
51
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
64
52
});
65
53
@@ -78,27 +66,15 @@ int main() {
78
66
joint_matrix<int8_t , matrix_use::b, 16 , 16 , matrix_layout::col_major>
79
67
sub_b;
80
68
81
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
82
- // @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.s32.p1i32(i32
83
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
69
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
84
70
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
85
- // CHECK: tail call { i32, i32 }
86
- // @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.s8.p0i32(i32*
87
- // %call.ascast.i.i49.i, i32 16) #{{.*}}
71
+ // CHECK: tail call { i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.s8.p0i32(i32* %call.ascast.i.i52.i, i32 16) #{{.*}}
88
72
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
89
- // CHECK: tail call { i32, i32 }
90
- // @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.s8.p0i32(i32*
91
- // %call.ascast.i.i.i, i32 16) #{{.*}}
73
+ // CHECK: tail call { i32, i32 } @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
92
74
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
93
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
94
- // @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8(i32 %19, i32 %20, i32 %23,
95
- // i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32
96
- // %15, i32 %16) #{{.*}}
75
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8(i32 %11, i32 %12, i32 %15, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
97
76
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
98
- // CHECK: tail call void
99
- // @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.s32.p1i32(i32
100
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
101
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
77
+ // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
102
78
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
103
79
});
104
80
@@ -117,27 +93,15 @@ int main() {
117
93
joint_matrix<int8_t , matrix_use::b, 16 , 8 , matrix_layout::row_major>
118
94
sub_b;
119
95
120
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
121
- // @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.s32.p1i32(i32
122
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
96
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
123
97
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
124
- // CHECK: tail call { i32, i32, i32, i32 }
125
- // @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.s8.p0i32(i32*
126
- // %call.ascast.i.i46.i, i32 16) #{{.*}}
98
+ // CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.s8.p0i32(i32* %call.ascast.i.i49.i, i32 16) #{{.*}}
127
99
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
128
- // CHECK: tail call i32
129
- // @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.s8.p0i32(i32*
130
- // %call.ascast.i.i.i, i32 16) #{{.*}}
100
+ // CHECK: tail call i32 @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
131
101
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
132
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
133
- // @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8(i32 %19, i32 %20, i32 %21,
134
- // i32 %22, i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32
135
- // %14, i32 %15, i32 %16) #{{.*}}
102
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8(i32 %11, i32 %12, i32 %13, i32 %14, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
136
103
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
137
- // CHECK: tail call void
138
- // @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.s32.p1i32(i32
139
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
140
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
104
+ // CHECK: tail call void @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
141
105
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
142
106
});
143
107
@@ -156,27 +120,15 @@ int main() {
156
120
joint_matrix<int8_t , matrix_use::b, 16 , 8 , matrix_layout::col_major>
157
121
sub_b;
158
122
159
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
160
- // @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.s32.p1i32(i32
161
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
123
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
162
124
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
163
- // CHECK: tail call { i32, i32, i32, i32 }
164
- // @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.s8.p0i32(i32*
165
- // %call.ascast.i.i46.i, i32 16) #{{.*}}
125
+ // CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.s8.p0i32(i32* %call.ascast.i.i49.i, i32 16) #{{.*}}
166
126
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
167
- // CHECK: tail call i32
168
- // @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.s8.p0i32(i32*
169
- // %call.ascast.i.i.i, i32 16) #{{.*}}
127
+ // CHECK: tail call i32 @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
170
128
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
171
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
172
- // @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8(i32 %19, i32 %20, i32 %21,
173
- // i32 %22, i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32
174
- // %14, i32 %15, i32 %16) #{{.*}}
129
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8(i32 %11, i32 %12, i32 %13, i32 %14, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
175
130
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
176
- // CHECK: tail call void
177
- // @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.s32.p1i32(i32
178
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
179
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
131
+ // CHECK: tail call void @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
180
132
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
181
133
});
182
134
@@ -195,27 +147,15 @@ int main() {
195
147
joint_matrix<int8_t , matrix_use::b, 16 , 32 , matrix_layout::row_major>
196
148
sub_b;
197
149
198
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
199
- // @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.s32.p1i32(i32
200
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
150
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
201
151
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
202
- // CHECK: tail call i32
203
- // @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.s8.p0i32(i32*
204
- // %call.ascast.i.i46.i, i32 16) #{{.*}}
152
+ // CHECK: tail call i32 @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.s8.p0i32(i32* %call.ascast.i.i49.i, i32 16) #{{.*}}
205
153
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
206
- // CHECK: tail call { i32, i32, i32, i32 }
207
- // @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.s8.p0i32(i32*
208
- // %call.ascast.i.i.i, i32 16) #{{.*}}
154
+ // CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
209
155
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
210
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
211
- // @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8(i32 %18, i32 %21, i32 %22,
212
- // i32 %23, i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32
213
- // %14, i32 %15, i32 %16) #{{.*}}
156
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8(i32 %10, i32 %13, i32 %14, i32 %15, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
214
157
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
215
- // CHECK: tail call void
216
- // @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.s32.p1i32(i32
217
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
218
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
158
+ // CHECK: tail call void @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
219
159
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
220
160
});
221
161
@@ -234,27 +174,15 @@ int main() {
234
174
joint_matrix<int8_t , matrix_use::b, 16 , 32 , matrix_layout::col_major>
235
175
sub_b;
236
176
237
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
238
- // @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.s32.p1i32(i32
239
- // addrspace(1)* %add.ptr.i, i32 16) #{{.*}}
177
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_, i32 16) #{{.*}}
240
178
joint_matrix_load (sg, sub_c, accC.get_pointer (), stride);
241
- // CHECK: tail call i32
242
- // @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.s8.p0i32(i32*
243
- // %call.ascast.i.i46.i, i32 16) #{{.*}}
179
+ // CHECK: tail call i32 @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.s8.p0i32(i32* %call.ascast.i.i49.i, i32 16) #{{.*}}
244
180
joint_matrix_load (sg, sub_a, accA.get_pointer (), stride);
245
- // CHECK: tail call { i32, i32, i32, i32 }
246
- // @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.s8.p0i32(i32*
247
- // %call.ascast.i.i.i, i32 16) #{{.*}}
181
+ // CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.s8.p0i32(i32* %call.ascast.i.i.i, i32 16) #{{.*}}
248
182
joint_matrix_load (sg, sub_b, accB.get_pointer (), stride);
249
- // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 }
250
- // @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8(i32 %18, i32 %21, i32 %22,
251
- // i32 %23, i32 %24, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32
252
- // %14, i32 %15, i32 %16) #{{.*}}
183
+ // CHECK: tail call { i32, i32, i32, i32, i32, i32, i32, i32 } @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8(i32 %10, i32 %13, i32 %14, i32 %15, i32 %16, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) #{{.*}}
253
184
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
254
- // CHECK: tail call void
255
- // @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.s32.p1i32(i32
256
- // addrspace(1)* %add.ptr.i76, i32 %26, i32 %27, i32 %28, i32 %29, i32
257
- // %30, i32 %31, i32 %32, i32 %33, i32 16) #{{.*}}
185
+ // CHECK: tail call void @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.s32.p1i32(i32 addrspace(1)* %_arg_14, i32 %18, i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 16) #{{.*}}
258
186
joint_matrix_store (sg, sub_c, accD.get_pointer (), stride);
259
187
});
260
188
});
0 commit comments