@@ -195,53 +195,89 @@ func.func @f3ext(%a: vector<5xi8>) -> vector<8xi17> {
195
195
196
196
// CHECK-LABEL: func.func @aligned_extsi(
197
197
func.func @aligned_extsi (%a: vector <8 xi4 >) -> vector <8 xi32 > {
198
- // CHECK: arith.shli
199
- // CHECK: arith.shrsi
200
- // CHECK: arith.shrsi
201
- // CHECK: vector.shuffle
202
- // CHECK: arith.extsi %{{.*}} : vector<8xi8> to vector<8xi32>
198
+ // CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi32> {
199
+ // CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
200
+ // CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
201
+ // CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
202
+ // CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
203
+ // CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
204
+ // CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
205
+ // CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8xi8> to vector<8xi32>
203
206
%0 = arith.extsi %a : vector <8 xi4 > to vector <8 xi32 >
204
207
return %0 : vector <8 xi32 >
205
208
}
206
209
210
+ // CHECK-LABEL: func.func @aligned_extsi_2d(
211
+ func.func @aligned_extsi_2d (%a: vector <8 x32 xi4 >) -> vector <8 x32 xi32 > {
212
+ // CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xi32> {
213
+ // CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8>
214
+ // CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8>
215
+ // CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
216
+ // CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8>
217
+ // CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
218
+ // CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8>
219
+ // CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xi32>
220
+ %0 = arith.extsi %a : vector <8 x32 xi4 > to vector <8 x32 xi32 >
221
+ return %0 : vector <8 x32 xi32 >
222
+ }
223
+
207
224
// CHECK-LABEL: func.func @aligned_extsi_base_case(
208
225
func.func @aligned_extsi_base_case (%a: vector <8 xi4 >) -> vector <8 xi8 > {
209
- // CHECK: arith.shli
210
- // CHECK: arith.shrsi
211
- // CHECK: arith.shrsi
212
- // CHECK: vector.shuffle
213
- // CHECK-NOT: arith.extsi
226
+ // CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi8> {
227
+ // CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
228
+ // CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
229
+ // CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
230
+ // CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
231
+ // CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
232
+ // CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
214
233
%0 = arith.extsi %a : vector <8 xi4 > to vector <8 xi8 >
215
234
return %0 : vector <8 xi8 >
216
235
}
217
236
218
237
// CHECK-LABEL: func.func @aligned_sitofp(
219
238
func.func @aligned_sitofp (%a: vector <8 xi4 >) -> vector <8 xf32 > {
220
- // CHECK: arith.shli
221
- // CHECK: arith.shrsi
222
- // CHECK: arith.shrsi
223
- // CHECK: shuffle
224
- // CHECK: arith.sitofp %{{.*}} : vector<8xi8> to vector<8xf32>
239
+ // CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xf32> {
240
+ // CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
241
+ // CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
242
+ // CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
243
+ // CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
244
+ // CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
245
+ // CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
246
+ // CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8xi8> to vector<8xf32>
225
247
%0 = arith.sitofp %a : vector <8 xi4 > to vector <8 xf32 >
226
248
return %0 : vector <8 xf32 >
227
249
}
228
250
251
+ // CHECK-LABEL: func.func @aligned_sitofp_2d(
252
+ func.func @aligned_sitofp_2d (%a: vector <8 x32 xi4 >) -> vector <8 x32 xf32 > {
253
+ // CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xf32> {
254
+ // CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8>
255
+ // CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8>
256
+ // CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
257
+ // CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8>
258
+ // CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
259
+ // CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8>
260
+ // CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xf32>
261
+ %0 = arith.sitofp %a : vector <8 x32 xi4 > to vector <8 x32 xf32 >
262
+ return %0 : vector <8 x32 xf32 >
263
+ }
264
+
229
265
// CHECK-LABEL: func.func @i4_transpose(
230
- // CHECK-SAME: %[[A:[0-9a-z]*]]
231
266
func.func @i4_transpose (%a: vector <8 x16 xi4 >) -> vector <16 x8 xi4 > {
232
- // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi4> to vector<8x16xi8>
233
- // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
234
- // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4>
267
+ // CHECK-SAME: %[[IN:.*]]: vector<8x16xi4>) -> vector<16x8xi4> {
268
+ // CHECK: %[[EXT:.*]] = vector.interleave
269
+ // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
270
+ // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4>
235
271
%0 = vector.transpose %a , [1 , 0 ] : vector <8 x16 xi4 > to vector <16 x8 xi4 >
236
272
return %0 : vector <16 x8 xi4 >
237
273
}
238
274
239
275
// CHECK-LABEL: func.func @i7_transpose(
240
- // CHECK-SAME: %[[A:[0-9a-z]*]]
241
276
func.func @i7_transpose (%a: vector <8 x16 xi7 >) -> vector <16 x8 xi7 > {
242
- // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi7> to vector<8x16xi8>
243
- // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
244
- // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7>
277
+ // CHECK-SAME: %[[IN:.*]]: vector<8x16xi7>) -> vector<16x8xi7> {
278
+ // CHECK: %[[EXT:.*]] = arith.extsi %[[IN]] : vector<8x16xi7> to vector<8x16xi8>
279
+ // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
280
+ // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7>
245
281
%0 = vector.transpose %a , [1 , 0 ] : vector <8 x16 xi7 > to vector <16 x8 xi7 >
246
282
return %0 : vector <16 x8 xi7 >
247
283
}
0 commit comments