@@ -435,6 +435,40 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
435
435
436
436
// -----
437
437
438
+ #map0 = affine_map <(d0 , d1 ) -> (d0 , d1 )>
439
+ func.func @elem_pack_transpose_outer_dims_unused_init (%arg0: tensor <128 x256 xi32 >, %init: tensor <128 x256 xi32 >) -> tensor <16 x4 x32 x16 xi32 >{
440
+ %elem = linalg.generic {index ing_maps = [#map0 , #map0 ], iterator_types = [" parallel" , " parallel" ]}
441
+ ins (%arg0 : tensor <128 x256 xi32 >)
442
+ outs (%init : tensor <128 x256 xi32 >) {
443
+ ^bb0 (%arg3: i32 , %arg4: i32 ):
444
+ %4 = arith.addi %arg3 , %arg3 : i32
445
+ linalg.yield %4 : i32
446
+ } -> tensor <128 x256 xi32 >
447
+ %empty = tensor.empty () : tensor <16 x4 x32 x16 xi32 >
448
+ %pack = linalg.pack %elem
449
+ outer_dims_perm = [1 , 0 ]
450
+ inner_dims_pos = [0 , 1 ]
451
+ inner_tiles = [32 , 16 ]
452
+ into %empty : tensor <128 x256 xi32 > -> tensor <16 x4 x32 x16 xi32 >
453
+ return %pack : tensor <16 x4 x32 x16 xi32 >
454
+ }
455
+
456
+ // CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
457
+ // CHECK-LABEL: func.func @elem_pack_transpose_outer_dims
458
+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
459
+ // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
460
+ // CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
461
+ // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
462
+ // CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
463
+ // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
464
+ // CHECK-SAME: into %[[ARG0_EMPTY]]
465
+ // CHECK: %[[RES:.+]] = linalg.generic
466
+ // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
467
+ // CHECK-SAME: ins(%[[PACKED_ARG0]]
468
+ // CHECK-SAME: outs(%[[ARG1_EMPTY]]
469
+
470
+ // -----
471
+
438
472
#map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
439
473
440
474
func.func @unpack_on_output (%arg0: tensor <12 x2 x56 x56 x32 xf32 >) -> tensor <12 x56 x56 x64 xf32 > {
@@ -497,7 +531,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
497
531
498
532
#map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
499
533
500
- func.func @unpack_element_type_change (%arg0: tensor <12 x2 x56 x56 x32 xf32 >, %init: tensor <12 x56 x56 x64 xf16 >) -> tensor <12 x56 x56 x64 xf16 > {
534
+ func.func @unpack_element_type_change_no_use (%arg0: tensor <12 x2 x56 x56 x32 xf32 >, %init: tensor <12 x56 x56 x64 xf16 >) -> tensor <12 x56 x56 x64 xf16 > {
501
535
%0 = tensor.empty () : tensor <12 x56 x56 x64 xf32 >
502
536
%1 = linalg.unpack %arg0 outer_dims_perm = [0 , 3 , 1 , 2 ] inner_dims_pos = [3 ] inner_tiles = [32 ] into %0 : tensor <12 x2 x56 x56 x32 xf32 > -> tensor <12 x56 x56 x64 xf32 >
503
537
%2 = linalg.generic {index ing_maps = [#map , #map ], iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]} ins (%1: tensor <12 x56 x56 x64 xf32 >) outs (%init : tensor <12 x56 x56 x64 xf16 >) {
@@ -509,17 +543,14 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
509
543
}
510
544
511
545
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
512
- // CHECK-LABEL: func.func @unpack_element_type_change
546
+ // CHECK-LABEL: func.func @unpack_element_type_change_no_use
513
547
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
514
548
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
515
- // CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
516
- // CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
517
- // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
518
- // CHECK-SAME: into %[[ARG1_PACK_EMPTY]]
549
+ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
519
550
// CHECK: %[[RES:.+]] = linalg.generic
520
551
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
521
552
// CHECK-SAME: ins(%[[ARG0]]
522
- // CHECK-SAME: outs(%[[ARG1_PACK ]]
553
+ // CHECK-SAME: outs(%[[EMPTY ]]
523
554
// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]]
524
555
// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
525
556
// CHECK-SAME: into %[[ARG1]]
@@ -1402,13 +1433,10 @@ func.func @push_unpack_in_padded_domain_foldable(%arg0: tensor<8x8x4x8xf32>, %de
1402
1433
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
1403
1434
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
1404
1435
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
1405
- // CHECK: %[[ARG2_PACK_EMPTY:.+]] = tensor.empty
1406
- // CHECK: %[[ARG2_PACK:.+]] = linalg.pack %[[ARG2]]
1407
- // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8]
1408
- // CHECK-SAME: into %[[ARG2_PACK_EMPTY]]
1436
+ // CHECK: %[[EMPTY:.+]] = tensor.empty
1409
1437
// CHECK: %[[GENERIC:.+]] = linalg.generic
1410
1438
// CHECK-SAME: ins(%[[ARG0]] : tensor<8x8x4x8xf32>)
1411
- // CHECK-SAME: outs(%[[ARG2_PACK ]] : tensor<?x8x4x8xbf16>)
1439
+ // CHECK-SAME: outs(%[[EMPTY ]] : tensor<?x8x4x8xbf16>)
1412
1440
// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[GENERIC]]
1413
1441
// CHECK-SAME: into %[[ARG2]]
1414
1442
// CHECK: return %[[UNPACK]] : tensor<?x64xbf16>
0 commit comments