Finish tests

Jerry Wu · Jerry Wu · commit 370ae551f593 · 2024-03-18T22:45:39.000Z
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -559,20 +559,34 @@ projectToInnerMostNonUnitDimsPos(ArrayRef<int64_t> dimsPos,
                                  ArrayRef<int64_t> baseShape) {
   SmallVector<int64_t> projectedDimsPos;
   for (auto pos : dimsPos) {
-    int64_t projectedPos = -1;
+    // In the case all dims are unit, this will return the inner-most one.
+    int64_t projectedPos = reassocIndices[pos].back();
     for (auto it = reassocIndices[pos].rbegin();
          it != reassocIndices[pos].rend(); ++it) {
-      projectedPos = *it;
-      if (baseShape[projectedPos] > 1) {
+      int64_t dim = baseShape[*it];
+      if (dim > 1 || ShapedType::isDynamic(dim)) {
+        projectedPos = *it;
         break;
       }
     }
-    assert(projectedPos != -1 && "projected dim not found");
     projectedDimsPos.push_back(projectedPos);
   }
   return projectedDimsPos;
 }
 
+static bool
+isProjectedDimsDivisibleByTileSizes(ArrayRef<int64_t> projectedDimsPos,
+                                    ArrayRef<int64_t> targetShape,
+                                    ArrayRef<int64_t> tileSizes) {
+  for (auto [projectedPos, tileSize] :
+       llvm::zip_equal(projectedDimsPos, tileSizes)) {
+    int64_t dim = targetShape[projectedPos];
+    if (ShapedType::isDynamic(dim) || (dim % tileSize) != 0)
+      return false;
+  }
+  return true;
+}
+
 static int64_t applyPermutationAndReindexReassoc(
     SmallVector<ReassociationIndices> &reassociationIndices,
     ArrayRef<int64_t> dimsPerm) {
@@ -589,23 +603,24 @@ static int64_t applyPermutationAndReindexReassoc(
 }
 
 /// Bubble up pack op through collapse shape op when the packed dims can be
-/// mapped to the source dims before collapsing. This is possible when the inner
-/// tile sizes can divide the mapped source dims.
+/// projected to the dims before collapsing. This is possible when the inner
+/// tile sizes can divide the projected dims.
 ///
 /// For example:
 ///
-/// %collapsed = tensor.collapse_shape %in [[0, 1], 2] : tensor<?x16x4xf32> into
-/// tensor<?x4xf32> %out = tensor.empty() : tensor<?x4x8x1xf32> %pack =
-/// tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1]
-/// inner_tiles = [8, 1] into %out : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+/// %collapsed = tensor.collapse_shape %in [[0, 1], 2]
+///     : tensor<?x16x4xf32> into tensor<?x4xf32>
+/// %pack = tensor.pack %collapsed outer_dims_perm = [0, 1]
+///     inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %empty
+///     : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
 ///
 /// Can be transformed into:
 ///
-/// %out = tensor.empty() : tensor<?x2x4x8x1xf32>
-/// %pack = tensor.pack %in outer_dims_perm = [1, 2] inner_dims_pos = [1, 2]
-/// inner_tiles = [8, 1] into %out : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
-/// %collapsed = tensor.collapse_shape %1 [[0, 1], 2, 3, 4] :
-/// tensor<?x2x4x8x1xf32> into tensor<?x4x8x1>
+/// %pack = tensor.pack %in outer_dims_perm = [1, 2]
+///     inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %empty
+///     : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+/// %collapsed = tensor.collapse_shape %pack [[0, 1], 2, 3, 4]
+///     : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1>
 static LogicalResult
 bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
                                    tensor::PackOp packOp,
@@ -620,13 +635,9 @@ bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
   SmallVector<int64_t> projectedInnerDimsPos =
       projectToInnerMostNonUnitDimsPos(innerDimsPos, reassocIndices, srcShape);
 
-  // Check if the projected dims on the source are divisible by the inner tile
-  // sizes.
-  for (auto [projectedPos, tileSize] :
-       llvm::zip_equal(projectedInnerDimsPos, innerTileSizes)) {
-    int64_t dim = srcShape[projectedPos];
-    if (ShapedType::isDynamic(dim) || (dim % tileSize) != 0)
-      return failure();
+  if (!isProjectedDimsDivisibleByTileSizes(projectedInnerDimsPos, srcShape,
+                                           innerTileSizes)) {
+    return failure();
   }
   // Expand the outer dims permutation with the associated source dims for the
   // new permutation after bubbling. This is because moving a collapsed dim is
@@ -646,7 +657,9 @@ bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
       packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm);
 
   SmallVector<ReassociationIndices> newReassocIndices = reassocIndices;
-  // First build reassociations on the outer dims after the permutation.
+  // First apply the permutation on the reassociations of the outer dims.
+  // For example given the permutation [1, 0], the reassociations: [[0, 1], [2]]
+  // -> [[0], [1, 2]]
   int64_t lastPos =
       applyPermutationAndReindexReassoc(newReassocIndices, outerDimsPerm);
   // Then add direct mapping for the inner tile dims.
@@ -698,6 +711,25 @@ class BubbleUpPackOpThroughReshapeOp final
   ControlPropagationFn controlFn;
 };
 
+/// Push down unpack op through expand shape op when the packed dims can be
+/// projected to the dims after expanding. This is possible when the inner tile
+/// sizes can divide the projected dims.
+///
+/// For example:
+///
+/// %unpack = tensor.unpack %in outer_dims_perm = [0, 1]
+///     inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %empty
+///     : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+/// %expanded = tensor.expand_shape %unpack [[0, 1], [2]]
+///     : tensor<?x256xf32> into tensor<?x256x256xf32>
+///
+/// Can be transformed into:
+///
+/// %expanded = tensor.expand_shape %ain [[0, 1], [2], [3], [4]]
+///     : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
+/// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2]
+///     inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty
+///     : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
 static LogicalResult
 pushDownUnPackOpThroughExpandShape(tensor::UnPackOp unPackOp,
                                    tensor::ExpandShapeOp expandOp,
@@ -712,15 +744,9 @@ pushDownUnPackOpThroughExpandShape(tensor::UnPackOp unPackOp,
   SmallVector<int64_t> projectedInnerDimsPos =
       projectToInnerMostNonUnitDimsPos(innerDimsPos, reassocIndices, dstShape);
 
-  // Check if the projected dims on the dest are divisible by the inner tile
-  // sizes.
-  for (auto [projectedPos, tileSize] :
-       llvm::zip_equal(projectedInnerDimsPos, innerTileSizes)) {
-    int64_t dim = dstShape[projectedPos];
-    if (ShapedType::isDynamic(dim) ||
-        (dstShape[projectedPos] % tileSize) != 0) {
-      return failure();
-    }
+  if (!isProjectedDimsDivisibleByTileSizes(projectedInnerDimsPos, dstShape,
+                                           innerTileSizes)) {
+    return failure();
   }
   // Expand the outer dims permutation with the associated expanded dims for the
   // new permutation after pushing. This is because moving a source dim is
@@ -733,7 +759,9 @@ pushDownUnPackOpThroughExpandShape(tensor::UnPackOp unPackOp,
   }
 
   SmallVector<ReassociationIndices> newReassocIndices = reassocIndices;
-  // First build reassociations on the outer dims after the permutation.
+  // First apply the permutation on the reassociations of the outer dims.
+  // For example given the permutation [1, 0], the reassociations: [[0, 1], [2]]
+  // -> [[0], [1, 2]]
   int64_t lastPos =
       applyPermutationAndReindexReassoc(newReassocIndices, outerDimsPerm);
   // Then add direct mapping for the inner tile dims.
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -906,58 +906,126 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
 // CHECK-SAME:      into %[[UNPACK_NEW_DEST]]
 // CHECK:         return %[[UNPACK]] : tensor<16x540x960xi32>
 
-func.func @bubble_up_pack_through_collapse(%1: tensor<192x16x64x4xf32>) -> tensor<384x256x8x1xf32> {
-  %collapsed = tensor.collapse_shape %1 [[0, 1], [2, 3]] : tensor<192x16x64x4xf32> into tensor<3072x256xf32>
-  %2 = tensor.empty() : tensor<384x256x8x1xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<3072x256xf32> -> tensor<384x256x8x1xf32>
-  func.return %pack : tensor<384x256x8x1xf32>
+// -----
+
+func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
+  %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
+  %2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
+  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+  func.return %pack : tensor<?x4x8x1xf32>
 }
+// CHECK-LABEL: func.func @bubble_up_pack_through_collapse
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:      %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[C0:.+]] = arith.constant 0 : index
+// CHECK:         %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
+// CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
+// CHECK:         return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
+
+// -----
 
 func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>) -> tensor<4x32x3072x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0], [1, 2], [3]] : tensor<4x192x16x256xf32> into tensor<4x3072x256xf32>
   %2 = tensor.empty() : tensor<4x32x3072x8x1xf32>
   %pack = tensor.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32>
   func.return %pack : tensor<4x32x3072x8x1xf32>
 }
+// CHECK-LABEL: func.func @bubble_up_permuted_pack_through_collapse
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<4x32x192x16x8x1xf32>
+// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32>
+// CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %pack {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x192x16x8x1xf32> into tensor<4x32x3072x8x1xf32>
+// CHECK:         return %[[COLLAPSED]] : tensor<4x32x3072x8x1xf32>
+
+// -----
 
 func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> tensor<8x4x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1, 2], [3]] : tensor<1x64x1x4xf32> into tensor<64x4xf32>
   %2 = tensor.empty() : tensor<8x4x8x1xf32>
   %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32>
   func.return %pack : tensor<8x4x8x1xf32>
 }
+// CHECK-LABEL: func.func @bubble_up_pack_through_unit_collapse
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x8x1x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32>
+// CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<1x8x1x4x8x1xf32> into tensor<8x4x8x1xf32>
+// CHECK:         return %[[COLLAPSED]] : tensor<8x4x8x1xf32>
+
+// -----
 
 func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4xf32>) -> tensor<384x32x8x8xf32> {
   %collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
   %2 = tensor.empty() : tensor<384x32x8x8xf32>
   %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32>
   func.return %pack : tensor<384x32x8x8xf32>
 }
+// CHECK-LABEL: func.func @no_bubble_up_pack_through_non_divisible_collapse
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
+// CHECK:         %[[PACK:.+]] = tensor.pack %[[COLLAPSED]]
+// CHECK:         return %[[PACK]] : tensor<384x32x8x8xf32>
 
-func.func @push_down_unpack_through_expand(%5: tensor<384x32x8x8xf32>) -> tensor<12x256x256xf32> {
-  %6 = tensor.empty() : tensor<3072x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
-  %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor<3072x256xf32> into tensor<12x256x256xf32>
-  func.return %expanded : tensor<12x256x256xf32>
+// -----
+
+func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index) -> tensor<?x256x256xf32> {
+  %6 = tensor.empty(%dim) : tensor<?x256xf32>
+  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+  %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor<?x256xf32> into tensor<?x256x256xf32>
+  func.return %expanded : tensor<?x256x256xf32>
 }
+// CHECK-LABEL: func.func @push_down_unpack_through_expand
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:      %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[C0:.+]] = arith.constant 0 : index
+// CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
+// CHECK:         %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
+// CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK:         return %[[UNPACK]] : tensor<?x256x256xf32>
+
+// -----
 
 func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>) -> tensor<4x12x256x256xf32> {
   %6 = tensor.empty() : tensor<4x3072x256xf32>
   %unpack = tensor.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32>
   %expanded = tensor.expand_shape %unpack [[0], [1, 2], [3]] : tensor<4x3072x256xf32> into tensor<4x12x256x256xf32>
   func.return %expanded : tensor<4x12x256x256xf32>
 }
+// CHECK-LABEL: @push_down_permuted_unpack_through_expand
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x384x8x8xf32> into tensor<4x32x12x32x8x8xf32>
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<4x12x256x256xf32>
+// CHECK:         %[[UNPACL:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32>
+// CHECK:         return %[[UNPACK]] : tensor<4x12x256x256xf32>
+
+// -----
 
 func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> tensor<3x16x1x256xf32> {
   %6 = tensor.empty() : tensor<48x256xf32>
   %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1, 2], [3]] : tensor<48x256xf32> into tensor<3x16x1x256xf32>
   func.return %expanded : tensor<3x16x1x256xf32>
 }
+// CHECK-LABEL: func.func @push_down_unpack_through_unit_expand
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<6x32x8x8xf32> into tensor<3x2x1x32x8x8xf32>
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<3x16x1x256xf32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32>
+// CHECK:         return %[[UNPACK]] : tensor<3x16x1x256xf32>
+
+// -----
 
 func.func @no_push_down_unpack_through_non_divisible_expand(%5: tensor<384x32x8x8xf32>) -> tensor<256x12x256xf32> {
   %6 = tensor.empty() : tensor<3072x256xf32>
   %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor<3072x256xf32> into tensor<256x12x256xf32>
   func.return %expanded : tensor<256x12x256xf32>
 }
+// CHECK-LABEL: func.func @no_push_down_unpack_through_non_divisible_expand
+// CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[UNPACK]] {{\[}}[0, 1], [2]] : tensor<3072x256xf32> into tensor<256x12x256xf32>
+// CHECK:         return %[[EXPANDED]] : tensor<256x12x256xf32>