@@ -229,15 +229,15 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
229
229
// final `vector.transfer_write` should be preserved as:
230
230
// vector.transfer_write %2, %subview
231
231
232
- // CHECK-LABEL: func.func @collapse_shape
232
+ // CHECK-LABEL: func.func @collapse_shape_and_read_from_source
233
233
// CHECK: scf.for {{.*}} {
234
234
// CHECK: vector.transfer_read
235
235
// CHECK: vector.transfer_write
236
236
// CHECK: vector.transfer_write
237
237
// CHECK: vector.transfer_read
238
238
// CHECK: vector.transfer_write
239
239
240
- func.func @collapse_shape (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
240
+ func.func @collapse_shape_and_read_from_source (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
241
241
%c0_i32 = arith.constant 0 : i32
242
242
%c0 = arith.constant 0 : index
243
243
%c4 = arith.constant 4 : index
@@ -257,6 +257,98 @@ func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
257
257
return
258
258
}
259
259
260
+ // The same regression test for expand_shape.
261
+
262
+ // CHECK-LABEL: func.func @expand_shape_and_read_from_source
263
+ // CHECK: scf.for {{.*}} {
264
+ // CHECK: vector.transfer_read
265
+ // CHECK: vector.transfer_write
266
+ // CHECK: vector.transfer_write
267
+ // CHECK: vector.transfer_read
268
+ // CHECK: vector.transfer_write
269
+
270
+ func.func @expand_shape_and_read_from_source (%in_0: memref <20 xi32 >, %vec: vector <1 x4 x1 xi32 >) {
271
+ %c0_i32 = arith.constant 0 : i32
272
+ %c0 = arith.constant 0 : index
273
+ %c4 = arith.constant 4 : index
274
+ %c20 = arith.constant 20 : index
275
+
276
+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <4 xi32 >
277
+ %expand_shape = memref.expand_shape %alloca [[0 , 1 , 2 ]] output_shape [1 , 4 , 1 ] : memref <4 xi32 > into memref <1 x4 x1 xi32 >
278
+ scf.for %arg0 = %c0 to %c20 step %c4 {
279
+ %subview = memref.subview %in_0 [%arg0 ] [4 ] [1 ] : memref <20 xi32 > to memref <4 xi32 , strided <[1 ], offset : ?>>
280
+ %1 = vector.transfer_read %subview [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 , strided <[1 ], offset : ?>>, vector <4 xi32 >
281
+ // $alloca and $expand_shape alias
282
+ vector.transfer_write %1 , %alloca [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
283
+ vector.transfer_write %vec , %expand_shape [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
284
+ %2 = vector.transfer_read %alloca [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 >, vector <4 xi32 >
285
+ vector.transfer_write %2 , %subview [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 , strided <[1 ], offset : ?>>
286
+ }
287
+ return
288
+ }
289
+
290
+ // The same regression test, but the initial write is to the collapsed memref,
291
+ // and the subsequent unforwardable read is from the collapse shape.
292
+
293
+ // CHECK-LABEL: func.func @collapse_shape_and_read_from_collapse
294
+ // CHECK: scf.for {{.*}} {
295
+ // CHECK: vector.transfer_read
296
+ // CHECK: vector.transfer_write
297
+ // CHECK: vector.transfer_write
298
+ // CHECK: vector.transfer_read
299
+ // CHECK: vector.transfer_write
300
+
301
+ func.func @collapse_shape_and_read_from_collapse (%in_0: memref <20 xi32 >, %vec: vector <1 x4 x1 xi32 >) {
302
+ %c0_i32 = arith.constant 0 : i32
303
+ %c0 = arith.constant 0 : index
304
+ %c4 = arith.constant 4 : index
305
+ %c20 = arith.constant 20 : index
306
+
307
+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <1 x4 x1 xi32 >
308
+ %collapse_shape = memref.collapse_shape %alloca [[0 , 1 , 2 ]] : memref <1 x4 x1 xi32 > into memref <4 xi32 >
309
+ scf.for %arg0 = %c0 to %c20 step %c4 {
310
+ %subview = memref.subview %in_0 [%arg0 ] [4 ] [1 ] : memref <20 xi32 > to memref <4 xi32 , strided <[1 ], offset : ?>>
311
+ %1 = vector.transfer_read %subview [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 , strided <[1 ], offset : ?>>, vector <4 xi32 >
312
+ vector.transfer_write %1 , %collapse_shape [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
313
+ // $alloca and $collapse_shape alias
314
+ vector.transfer_write %vec , %alloca [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
315
+ %2 = vector.transfer_read %collapse_shape [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 >, vector <4 xi32 >
316
+ vector.transfer_write %2 , %subview [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 , strided <[1 ], offset : ?>>
317
+ }
318
+ return
319
+ }
320
+
321
+ // The same test except writing to the expanded source first (same as the
322
+ // previous collapse test but for expand).
323
+
324
+ // CHECK-LABEL: func.func @expand_shape_and_read_from_expand
325
+ // CHECK: scf.for {{.*}} {
326
+ // CHECK: vector.transfer_read
327
+ // CHECK: vector.transfer_write
328
+ // CHECK: vector.transfer_write
329
+ // CHECK: vector.transfer_read
330
+ // CHECK: vector.transfer_write
331
+
332
+ func.func @expand_shape_and_read_from_expand (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
333
+ %c0_i32 = arith.constant 0 : i32
334
+ %c0 = arith.constant 0 : index
335
+ %c4 = arith.constant 4 : index
336
+ %c20 = arith.constant 20 : index
337
+
338
+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <4 xi32 >
339
+ %expand_shape = memref.expand_shape %alloca [[0 , 1 , 2 ]] output_shape [1 , 4 , 1 ] : memref <4 xi32 > into memref <1 x4 x1 xi32 >
340
+ scf.for %arg0 = %c0 to %c20 step %c4 {
341
+ %subview = memref.subview %in_0 [0 , %arg0 , 0 ] [1 , 4 , 1 ] [1 , 1 , 1 ] : memref <1 x20 x1 xi32 > to memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>
342
+ %1 = vector.transfer_read %subview [%c0 , %c0 , %c0 ], %c0_i32 {in_bounds = [true , true , true ]} : memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>, vector <1 x4 x1 xi32 >
343
+ vector.transfer_write %1 , %expand_shape [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
344
+ // $alloca and $expand_shape alias
345
+ vector.transfer_write %vec , %alloca [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
346
+ %2 = vector.transfer_read %expand_shape [%c0 , %c0 , %c0 ], %c0_i32 {in_bounds = [true , true , true ]} : memref <1 x4 x1 xi32 >, vector <1 x4 x1 xi32 >
347
+ vector.transfer_write %2 , %subview [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>
348
+ }
349
+ return
350
+ }
351
+
260
352
// CHECK-LABEL: func @forward_dead_store_dynamic_same_index
261
353
// CHECK-NOT: vector.transfer_write
262
354
// CHECK-NOT: vector.transfer_read
0 commit comments