-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][linalg][nfc] Update "pack-dynamic-inner-tile.mlir" #117533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][linalg][nfc] Update "pack-dynamic-inner-tile.mlir" #117533
Conversation
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Andrzej Warzyński (banach-space) Changes
Full diff: https://github.com/llvm/llvm-project/pull/117533.diff 10 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index e3084530bd11b5..dc10f3a1c58ae3 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -52,6 +52,17 @@ def ApplyDecomposeTensorPackUnpackPatternsOp
let assemblyFormat = "attr-dict";
}
+def ApplyDecomposeTensorPadPatternsOp
+ : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pad",
+ [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let description = [{
+ Collect patterns to decompose tensor.pad into e.g. tensor::EmptyOp,
+ linalg::FillOp and tensor::InsertSliceOp.
+ }];
+
+ let assemblyFormat = "attr-dict";
+}
+
def ApplyFoldUnitExtentDimsViaReshapesPatternsOp : Op<Transform_Dialect,
"apply_patterns.linalg.fold_unit_extent_dims_via_reshapes",
[DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 51967f83fee377..3c160d55a38e75 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1503,8 +1503,8 @@ using OptimizeCopyFn =
/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
/// InsertSliceOp. For now, only constant padding values are supported.
-struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
- GeneralizePadOpPattern(MLIRContext *context, PatternBenefit benefit = 1)
+struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
+ DecomposePadOpPattern(MLIRContext *context, PatternBenefit benefit = 1)
: OpRewritePattern<tensor::PadOp>(context, benefit) {}
LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override;
@@ -1688,6 +1688,10 @@ void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
/// outer dims to be unit.
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns);
+/// Populates patterns to decompose tensor.pad into e.g.
+/// tensor.empty, linalg.fill, tensor.insert_slice.
+void populateDecomposePadPatterns(RewritePatternSet &patterns);
+
/// Populates patterns to transform linalg.conv_2d_xxx operations into
/// linalg.generic (for img2col packing) and linalg.matmul.
/// \see rewriteInIm2Col for more details.
diff --git a/mlir/lib/Conversion/TensorToLinalg/TensorToLinalg.cpp b/mlir/lib/Conversion/TensorToLinalg/TensorToLinalg.cpp
index 5bb79d4bc84e2b..b0ca0ca13d0624 100644
--- a/mlir/lib/Conversion/TensorToLinalg/TensorToLinalg.cpp
+++ b/mlir/lib/Conversion/TensorToLinalg/TensorToLinalg.cpp
@@ -25,5 +25,7 @@ using namespace mlir;
//===----------------------------------------------------------------------===//
void mlir::populateTensorToLinalgPatterns(RewritePatternSet &patterns) {
- patterns.add<mlir::linalg::GeneralizePadOpPattern>(patterns.getContext());
+ // TODO: Add the remaining patterns, e.g. to decompose Pack/Unpack Ops.
+ // Alternatively, delete this file.
+ patterns.add<mlir::linalg::DecomposePadOpPattern>(patterns.getContext());
}
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index ada80deacfdbfe..e08be7d2ebd6ae 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -234,6 +234,11 @@ void transform::ApplyDecomposeTensorPackUnpackPatternsOp::populatePatterns(
linalg::populateDecomposePackUnpackPatterns(patterns);
}
+void transform::ApplyDecomposeTensorPadPatternsOp::populatePatterns(
+ RewritePatternSet &patterns) {
+ linalg::populateDecomposePadPatterns(patterns);
+}
+
void transform::ApplyFoldUnitExtentDimsViaReshapesPatternsOp::populatePatterns(
RewritePatternSet &patterns) {
linalg::ControlDropUnitDims options;
@@ -3491,8 +3496,12 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne(
// Add misc. vectorization patterns (e.g. for tensor.insert_slice)
linalg::populateInsertSliceVectorizationPatterns(patterns);
- if (getVectorizePadding())
+ if (getVectorizePadding()) {
linalg::populatePadOpVectorizationPatterns(patterns);
+ // This creates an alternative path for lowering tensor.pad - by
+ // decomposing it into e.g. linalg.fill.
+ linalg::populateDecomposePadPatterns(patterns);
+ }
vector::populateVectorStepLoweringPatterns(patterns);
TrackingListener listener(state, *this);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index d92543d7264625..c3e176299317ef 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -921,7 +921,7 @@ LogicalResult mlir::linalg::CopyVectorizationPattern::matchAndRewrite(
/// Filling `dest` using FillOp constant padding value if possible.
/// Otherwise, generate a tensor::GenerateOp.
-Value GeneralizePadOpPattern::createFillOrGenerateOp(
+Value DecomposePadOpPattern::createFillOrGenerateOp(
RewriterBase &rewriter, tensor::PadOp padOp, Value dest,
const SmallVector<Value> &dynSizes) const {
auto padValue = padOp.getConstantPaddingValue();
@@ -938,8 +938,8 @@ Value GeneralizePadOpPattern::createFillOrGenerateOp(
}
LogicalResult
-GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp,
- PatternRewriter &rewriter) const {
+DecomposePadOpPattern::matchAndRewrite(tensor::PadOp padOp,
+ PatternRewriter &rewriter) const {
// Given an OpFoldResult, return an index-typed value.
auto getIdxValue = [&](OpFoldResult ofr) {
if (auto val = llvm::dyn_cast_if_present<Value>(ofr))
@@ -1623,3 +1623,7 @@ void linalg::populateDecomposePackUnpackPatterns(RewritePatternSet &patterns) {
// TODO: Add and test patterns for tensor.unpack
patterns.add<DecomposeOuterUnitDimsPackOpPattern>(patterns.getContext());
}
+
+void linalg::populateDecomposePadPatterns(RewritePatternSet &patterns) {
+ patterns.add<DecomposePadOpPattern>(patterns.getContext());
+}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 23b46a2ee55f8d..06bb6c0fb1cac9 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2770,12 +2770,6 @@ void mlir::linalg::populateInsertSliceVectorizationPatterns(
void mlir::linalg::populatePadOpVectorizationPatterns(
RewritePatternSet &patterns, PatternBenefit baseBenefit) {
- // TODO: The following pattern implements "decomposition" and
- // optional "vectorization". Seperate "decomposition" into a sepereate
- // pre-processing pattern group.
- patterns.add<GeneralizePadOpPattern>(patterns.getContext(), baseBenefit);
-
- // Try these specialized patterns first before resorting to the generic one.
patterns.add<PadOpVectorizationWithTransferReadPattern,
PadOpVectorizationWithTransferWritePattern,
PadOpVectorizationWithInsertSlicePattern>(
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/decompose-pad-tensor.mlir
similarity index 98%
rename from mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
rename to mlir/test/Dialect/Linalg/decompose-pad-tensor.mlir
index 2beab31b613d54..184361dfb30dfd 100644
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-pad-tensor.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file --test-linalg-transform-patterns="test-generalize-pad-tensor" %s | FileCheck %s
+// RUN: mlir-opt -split-input-file --test-linalg-transform-patterns="test-decompose-pad-tensor" %s | FileCheck %s
// CHECK-LABEL: func @generalize_pad_tensor_static_shape(
// CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
index 640de85cc5f12e..41e480648177f5 100644
--- a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
@@ -202,6 +202,8 @@ module attributes {transform.with_named_sequence} {
%func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
transform.apply_patterns to %func_op {
+ // TODO: Split into two tests, one for each pattern
+ transform.apply_patterns.linalg.decompose_pad
transform.apply_patterns.linalg.pad_vectorization
} : !transform.op<"func.func">
transform.yield
@@ -236,6 +238,8 @@ module attributes {transform.with_named_sequence} {
%func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
transform.apply_patterns to %func_op {
+ // TODO: Split into two tests, one for each pattern
+ transform.apply_patterns.linalg.decompose_pad
transform.apply_patterns.linalg.pad_vectorization
} : !transform.op<"func.func">
transform.yield
@@ -270,6 +274,8 @@ module attributes {transform.with_named_sequence} {
%func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
transform.apply_patterns to %func_op {
+ // TODO: Split into two tests, one for each pattern
+ transform.apply_patterns.linalg.decompose_pad
transform.apply_patterns.linalg.pad_vectorization
} : !transform.op<"func.func">
transform.yield
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
index 32b7247e60d622..0d2fd977c8d557 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
@@ -10,10 +10,6 @@
/// End-to-end test for tensor.pack where one of the inner tile sizes is
/// dynamic.
-///
-/// Note, ATM this is a relatively simple example, with no vectorization and
-/// the dynamic tile size being a compile-time constant. The intention is to
-/// incrementally expand the config to something much more complex.
func.func @main() {
// Allocate and initialise the inputs
@@ -89,26 +85,49 @@ module @transforms attributes { transform.with_named_sequence } {
%tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- // 2. Decompose the tiled Op into (trimmed for brevity):
+ // 2. Decompose the tiled pack Op into (trimmed for brevity):
//
// %padded = tensor.pad %slice_of_A (..) :
// tensor<?x?xi32> to tensor<8x1xi32>
// %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) :
// tensor<8x1xi32> into tensor<1x1x?x1xi32>
//
- // NOTE: no tile is transposed, hence no linalg.transpose
- %func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func_1 {
+ // (NOTE: no tile is transposed, hence no linalg.transpose)
+ //
+ // This is followed by this decomposition of the pad Op:
+ //
+ // %c123_i32 = arith.constant 123 : i32
+ // %slice_of_A = tensor.extract_slice %A[%3, %arg3] [%4, %5] [1, 1] :
+ // tensor<7x16xi32> to tensor<?x?xi32>
+ // %empty = tensor.empty() : tensor<8x1xi32>
+ // %fill = linalg.fill ins(%c123_i32 : i32) outs(%empty :
+ // tensor<8x1xi32>) -> tensor<8x1xi32>
+ // %inserted_slice = tensor.insert_slice %slice_of_A into %fill[0, 0] [%4, %5] [1, 1] :
+ // tensor<?x?xi32> into tensor<8x1xi32>
+ //
+ %func_op = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func">
+ transform.apply_patterns to %func_op {
transform.apply_patterns.linalg.decompose_pack_unpack
- } : !transform.any_op
+ transform.apply_patterns.linalg.decompose_pad
+ } : !transform.op<"func.func">
+
+ // 3. Vectorize linalg.fill.
+ // Vector sizes match the inner tiles in the payload IR.
+ %fill = transform.structured.match ops{["linalg.fill"]} in %func_op : (!transform.op<"func.func">) -> !transform.any_op
+ transform.structured.vectorize %fill vector_sizes [8, 1] : !transform.any_op
+
+ transform.apply_patterns to %func_op {
+ transform.apply_patterns.tensor.fold_tensor_subset_ops
+ transform.apply_patterns.canonicalization
+ } : !transform.op<"func.func">
// 3. Bufferize before lowering to LLVM
%bufferize = transform.bufferization.one_shot_bufferize %module
{bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
// 4. Canonicalize
- %func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func">
- transform.apply_patterns to %func_2 {
+ %func_op_bufferized = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func">
+ transform.apply_patterns to %func_op_bufferized {
transform.apply_patterns.canonicalization
} : !transform.op<"func.func">
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index c65e68eaf31f09..25aec75c3c14ad 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -70,8 +70,8 @@ struct TestLinalgTransforms
llvm::cl::desc("Test a set of patterns that rewrite a linalg contraction "
"in vector.contract form"),
llvm::cl::init(false)};
- Option<bool> testGeneralizePadTensor{
- *this, "test-generalize-pad-tensor",
+ Option<bool> testDecomposePadTensor{
+ *this, "test-decompose-pad-tensor",
llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
llvm::cl::init(false)};
Option<bool> testDecomposeTensorPackOp{
@@ -166,9 +166,9 @@ static void applyLinalgToVectorPatterns(func::FuncOp funcOp) {
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
-static void applyGeneralizePadTensorPatterns(func::FuncOp funcOp) {
+static void applyDecomposePadPatterns(func::FuncOp funcOp) {
RewritePatternSet patterns(funcOp.getContext());
- patterns.add<GeneralizePadOpPattern>(funcOp.getContext());
+ patterns.add<DecomposePadOpPattern>(funcOp.getContext());
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}
@@ -235,8 +235,8 @@ void TestLinalgTransforms::runOnOperation() {
return applyVectorTransferForwardingPatterns(getOperation());
if (testGenericToVectorPattern)
return applyLinalgToVectorPatterns(getOperation());
- if (testGeneralizePadTensor)
- return applyGeneralizePadTensorPatterns(getOperation());
+ if (testDecomposePadTensor)
+ return applyDecomposePadPatterns(getOperation());
if (testDecomposeTensorPackOp)
return applyDecomposeTensorPackPatterns(getOperation());
if (testDecomposeTensorUnPackOp)
|
// NOTE: no tile is transposed, hence no linalg.transpose | ||
%func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op | ||
transform.apply_patterns to %func_1 { | ||
// (NOTE: no tile is transposed, hence no linalg.transpose) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it because the pack op is decomposed with rank-reduced slices + outer_dims_perm map is empty/identity? Otherwise, I'd expect a transpose op that transposes the inner dimension of the first dimension into inner tiles.
E.g., it should be tensor<?x?x16x1>
after expanding the padded tensor, so I'd expect a transpose to bring it to tensor<?x16x?x1>
.
%A_pack = tensor.pack %A
padding_value(%pad_val : i32)
inner_dims_pos = [0, 1]
inner_tiles = [%tile_size, 1]
into %A_pack_empty : tensor<7x16xi32> -> tensor<?x16x?x1xi32>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this example, note that no dimensions are transposed:
inner_dims_pos
is an identity.- There's no
outer_dims_perm
(so it's also an identity).
Referring to the original tensor.pack
:
- We start with
tensor<7x16xi32>
and tile:- Dimension
7
using%tile_size
(which is%c8
). - Dimension
16
using1
.
- Dimension
- This results in
?x1
as the trailing/inner dimensions in the output tensor. - The remaining dimensions form
?x16
as the outer dimensions in the output tensor:?
corresponds to the tiling along7
.16
comes from the calculationoriginal_dim
/tile_size
= 16 / 1 = 16.
Does this make sense? Let me know if anything needs clarification - I want to ensure I'm explaining this correctly 😅.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I understood this part. There are few ways to decompose pack ops. We can either drop or not drop unit dims during the decomposition. I think I got the answer in the above transform op. We firstly tile outer dims with tile_size=1, and the outer dimensions all have size=1. Then we decompose the ops. In the decomposition, we use patterns that drop outer unit dims, so there are no transpose ops. So I can connect all the pieces now, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I reviewed the last commit and it looks good to me, thanks!
[mlir][linalg][nfc] Update pack-dynamic-inner-tile.mlir Builds on: * llvm#117329: Extract GeneralizePadOpPattern into a standalone transformation. * llvm#116373: Update pack-dynamic-inner-tile.mlir. This update adds vectorization to the "pack-dynamic-inner-tile.mlir" pipeline. The pipeline first decomposes `tensor.pack` into `tensor.pad` and then into `linalg.fill` (llvm#117329). Next, `linalg.fill` is vectorized, with vector sizes matching the inner tile sizes of the original `tensor.pack`. ••NOTE:** Depends on llvm#117329 - please only review the top commit!
d0e7294
to
b1aa3ab
Compare
Builds on:
GeneralizePadOpPattern
into a standalone transformation #117329: Extract GeneralizePadOpPattern into a standalone transformation.This update adds vectorization to the "pack-dynamic-inner-tile.mlir"
pipeline.
The pipeline first decomposes
tensor.pack
intotensor.pad
and theninto
linalg.fill
(#117329). Next,linalg.fill
is vectorized, with vector sizes matching the inner tile sizesof the original
tensor.pack
.