-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][linalg] Add tests for tensor.unpack decomposition #118786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][linalg] Add tests for tensor.unpack decomposition #118786
Conversation
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) ChangesThis commit adds additional tests and documentation for The new tests aim to improve implementation, documentation, and test
Notes: The test Full diff: https://github.com/llvm/llvm-project/pull/118786.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 3c160d55a38e75..84c13ad2b363ce 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1519,7 +1519,7 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
/// * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp +
/// tensor::InsertSliceOp ops.
///
-/// Required that all the outer dims of the input tensor::PackOp are 1.
+/// Requires that all the outer dims of the input tensor::PackOp are 1.
///
/// Before:
/// ```
@@ -1555,9 +1555,32 @@ struct DecomposeOuterUnitDimsPackOpPattern
PatternRewriter &rewriter) const override;
};
-/// Rewrites a tensor::UnPackOp into a sequence of rank-reduced extract_slice op
-/// + transpose op + insert_slice op, where the tensor::UnPackOp has outer dims
-/// being all 1s.
+/// Rewrites a tensor::UnPackOp into a sequence of rank-reduced
+/// * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp
+///
+/// Requires that all the outer dims of the input tensor::PackOp are 1.
+///
+/// Before:
+/// ```
+/// %packed = tensor.unpack %input
+/// inner_dims_pos = [1, 0]
+/// inner_tiles = [2, 8]
+/// into %output : tensor<1x1x2x8xf32> -> tensor<5x1xf32>
+/// ```
+///
+/// After:
+/// ```
+/// // Rank-reduced extract to obtain the tile
+/// %slice = tensor.extract_slice %arg0[0, 0, 0, 0] [1, 1, 2, 8] [1, 1, 1, 1] : tensor<1x1x2x8xf32> to tensor<2x8xf32>
+/// // EmptyOp + TransposeOp
+/// %init = tensor.empty() : tensor<8x2xf32>
+/// %transposed = linalg.transpose
+/// ins(%extracted_slice : tensor<2x8xf32>)
+/// outs(%0 : tensor<8x2xf32>) permutation = [1, 0]
+/// // Extract a slice matching the specified output size
+/// %result = tensor.extract_slice %transposed[0, 0] [5, 1] [1, 1] : tensor<8x2xf32> to tensor<5x1xf32>
+///
+/// ```
struct DecomposeOuterUnitDimsUnPackOpPattern
: public OpRewritePattern<tensor::UnPackOp> {
using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
index 4f986606ef93ad..1cc1484ed40951 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
@@ -67,6 +67,9 @@ func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: te
// CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD:.*]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[TILE_DIM_0]], 2] [1, 1, 1, 1] : tensor<?x2xf32> into tensor<1x1x?x2xf32>
// CHECK: return %[[RES]] : tensor<1x1x?x2xf32>
+/// Same as example above, but the dynamic tile size is a compile-time constant
+/// that's folded away.
+
func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> {
%tile_dim_0 = arith.constant 8 : index
%0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
index 8b15873473a972..8d36242112a1b1 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
@@ -19,11 +19,11 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<
// -----
-func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
+func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
%0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
return %0 : tensor<5x1xf32>
}
-// CHECK-LABEL: func.func @simple_unpack_and_extract_slice
+// CHECK-LABEL: func.func @simple_unpack_static_tiles
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1]
@@ -33,6 +33,55 @@ func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output:
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TILE]][0, 0] [5, 1] [1, 1]
// CHECK: return %[[SLICE]]
+/// Same as example above, but with 1 dynamic tile size.
+
+func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tensor<5x1xf32>, %tile_dim_0: index) -> tensor<5x1xf32> {
+ %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+ return %0 : tensor<5x1xf32>
+}
+// CHECK-LABEL: func.func @simple_unpack_dynamic_tile
+// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[TILE_DIM_1:[a-zA-Z0-9]+]]
+// CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, %[[TILE_DIM_1]], 2] [1, 1, 1, 1]
+// CHECK-NOT: linalg.transpose
+// They have the same type, so the insert_slice op is folded
+// away.
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TILE]][0, 0] [5, 1] [1, 1]
+// CHECK: return %[[SLICE]]
+
+/// Same as example above, but with 1 dynamic tile size and a trasnpose
+
+/// FIXME: This is Currently broken:
+/// * 'tensor.empty' op incorrect number of dynamic sizes, has 0, expected 1
+
+//func.func @simple_unpack_dynamic_tile_transpose(%input: tensor<1x1x2x?xf32>, %output: tensor<5x1xf32>, %tile_dim_0: index) -> tensor<5x1xf32> {
+// %0 = tensor.unpack %input inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_0] into %output : tensor<1x1x2x?xf32> -> tensor<5x1xf32>
+// return %0 : tensor<5x1xf32>
+//}
+
+/// Same as example above, but with 1 scalable tile size.
+
+func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
+ %c8 = arith.constant 8 : index
+ %vscale = vector.vscale
+ %c8_vscale = arith.muli %vscale, %c8 : index
+ %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+ return %0 : tensor<5x1xf32>
+}
+// CHECK-LABEL: func.func @simple_unpack_scalable_tile
+// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
+// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
+// CHECK-DAG: %[[VS:.+]] = vector.vscale
+// CHECK: %[[C8_VS:.+]] = arith.muli %[[VS]], %[[C8]] : index
+// CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, %[[C8_VS]], 2] [1, 1, 1, 1]
+// CHECK-NOT: linalg.transpose
+// They have the same type, so the insert_slice op is folded
+// away.
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TILE]][0, 0] [5, 1] [1, 1]
+// CHECK: return %[[SLICE]]
+
// -----
func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
This commit adds additional tests and documentation for `DecomposeOuterUnitDimsUnPackOpPattern` to ensure symmetry with its counterpart for `tensor.pack`, `DecomposeOuterUnitDimsPackOpPattern`. The new tests aim to improve implementation, documentation, and test coverage for tensor.unpack. They cover the following scenarios: * Static tile sizes: A simple `tensor.unpack` case (`@simple_unpack_static_tiles`). * Dynamic tile size: `tensor.unpack` with a single dynamic tile size (`@simple_unpack_dynamic_tile`). * Transpose: `tensor.unpack` with dynamic tile size and transpose (`@simple_unpack_dynamic_tile_transpose`), currently commented out due to some missing logic (see below) * Scalable tile size: `tensor.unpack` with a scalable inner tile size (@simple_unpack_scalable_tile). Notes: The test `@simple_unpack_dynamic_tile_transpose` is commented out because the logic for capturing dynamic sizes for `tensor::EmptyOp` when some tile sizes are dynamic is incomplete. This missing functionality will be addressed in a follow-up patch.
49dc4d5
to
634021e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the improvement!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds reasonable. Thanks for the additional comments and tests!
This PR fixes an issue identified in llvm#118786, where the following example triggers a verification error: ```mlir func.func @foo( %src: tensor<1x1x2x?xf32>, %dest: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> { %0 = tensor.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } ``` The error occurs because of faulty logic when computing dynamic sizes for `tensor::EmptyOp`, which initializes tensors for `linalg::transpose`. This specific example fails due to: * Dynamic inner tile size, combined with * Non-identity permutation. For reference, here's the verification error: ```bash error: 'tensor.empty' op incorrect number of dynamic sizes, has 0, expected 1 ``` and here's the problematic `tensor.empty` (printed in generic form): ```mlir %1 = "tensor.empty"() : () -> tensor<?x2xf32> ``` **Fix** This PR refactors how dynamic sizes for `tensor::EmptyOp` are computed. Instead of generating a separate vector of dynamic sizes to complement the output shape, this PR adopts a common MLIR idiom: passing a vector of `OpFoldResult`s directly to the `EmptyOp` builder. Previously, only dynamic sizes corresponding to outer dimensions were tracked (see `dynamicSizes`), while inner dimensions were skipped, causing issues in certain cases. **Refactoring changes** Variable names have been updated for better readability: * `readShape` → `readShapeForExtractSlice` * `readSizes` → `extractSliceSizes` * `readStrides` → `stridesForExtractSlice` Additional comments have been added for clarity. **Remaining inconsistencies** Patterns for `tensor::PackOp` and `tensor::UnpackOp` remain partially inconsistent: `DecomposeOuterUnitDimsPackOpPattern` enforces that all outer dimensions must be unit-sized, while `DecomposeOuterUnitDimsUnPackOpPattern` does not. The two implementations have diverged in logic. I plan to address these inconsistencies in a follow-up PR to further unify these patterns.
This commit adds additional tests and documentation for
DecomposeOuterUnitDimsUnPackOpPattern
to ensure symmetry with itscounterpart for
tensor.pack
,DecomposeOuterUnitDimsPackOpPattern
.The new tests aim to improve implementation, documentation, and test
coverage for tensor.unpack. They cover the following scenarios:
tensor.unpack
case(
@simple_unpack_static_tiles
).tensor.unpack
with a single dynamic tile size(
@simple_unpack_dynamic_tile
).tensor.unpack
with dynamic tile size and transpose(
@simple_unpack_dynamic_tile_transpose
), currently commented out dueto some missing logic (see below)
tensor.unpack
with a scalable inner tile size(@simple_unpack_scalable_tile).
Notes:
The test
@simple_unpack_dynamic_tile_transpose
is commented outbecause the logic for capturing dynamic sizes for
tensor::EmptyOp
whensome tile sizes are dynamic is incomplete. This missing functionality
will be addressed in a follow-up patch.