-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][Transform] Hoist Pad generates linalg.transpose #109669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
4b36091
to
d736889
Compare
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Hugo Trachino (nujaa) ChangesFor readability purpose, generate linalg named ops when possible. Similarly, Full diff: https://github.com/llvm/llvm-project/pull/109669.diff 8 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 0208f854f799ec..48e657cca96e39 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -549,7 +549,7 @@ namespace detail {
struct PackingResult {
SmallVector<OpFoldResult> offsets, sizes, strides;
SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
- GenericOp maybeTransposeOp;
+ TransposeOp maybeTransposeOp;
tensor::PadOp hoistedPadOp;
};
@@ -568,9 +568,9 @@ buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist,
/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
/// in the packing loop so the caller can continue reasoning about the padding
/// operation. If `transposeVector` is non-empty, hoist padding introduces a
-/// GenericOp to transpose the padded tensor before inserting it into the packed
-/// tensor. A `transposeVector` can change the storage order of the padded
-/// tensor but does not change the order of the pack or compute loops.
+/// TransposeOp to transpose the padded tensor before inserting it into the
+/// packed tensor. A `transposeVector` can change the storage order of the
+/// padded tensor but does not change the order of the pack or compute loops.
///
/// TODO: In the future, we should consider rewriting as a tensor.pack after
/// hoisting since this abstraction is now available.
@@ -615,13 +615,13 @@ FailureOr<Value>
hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist,
int64_t numLoops, ArrayRef<int64_t> transposeVector,
tensor::PadOp &hoistedOp,
- SmallVectorImpl<GenericOp> &transposeOps);
+ SmallVectorImpl<TransposeOp> &transposeOps);
/// Calls into `hoistPaddingOnTensors` with a local IRRewriter.
FailureOr<Value>
hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector,
tensor::PadOp &hoistedOp,
- SmallVectorImpl<GenericOp> &transposeOps);
+ SmallVectorImpl<TransposeOp> &transposeOps);
/// Apply padding and hoisting to `linalgOp` according to the configuration
/// specified in `options`.
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index f1df49ce3eaa36..1e4f3004dec7e7 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -75,12 +75,6 @@ bool isReductionIterator(utils::IteratorType iteratorType);
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
Value source, Value pad, bool nofold);
-/// Returns a GenericOp that transposes `inputTensor` into `outputTensor`
-/// using `transposeVector` to permute the `inputTensor` dimensions.
-GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
- Value outputTensor,
- ArrayRef<int64_t> transposeVector);
-
/// Returns GenericOp that copies an n-D memref. Unlike the current
/// implementation of memref::CopyOp, this op can further tile, lower to loops
/// or vectorize.
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 29b5631f61b488..c28b07f33f5dcb 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -2000,7 +2000,7 @@ transform::HoistPadOp::applyToOne(transform::TransformRewriter &rewriter,
transform::ApplyToEachResultList &results,
transform::TransformState &state) {
tensor::PadOp hoistedPadOp;
- SmallVector<GenericOp> transposeOps;
+ SmallVector<TransposeOp> transposeOps;
FailureOr<Value> result =
hoistPaddingOnTensors(rewriter, target, getNumLoops(), getTranspose(),
hoistedPadOp, transposeOps);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index c3a08ce86082a8..d33a17af63459e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -633,15 +633,15 @@ static FailureOr<PackingResult> buildPackingLoopNestImpl(
rewriter.getIndexAttr(1));
// Step 3. Optionally transpose the padded tensor.
- GenericOp maybeTransposeOp;
+ TransposeOp maybeTransposeOp;
Value paddedTensor = bvm.lookup(opToHoist.getResult());
if (!transposeVector.empty()) {
Value outputTensor = rewriter.create<tensor::ExtractSliceOp>(
loc, transposedTensorType, hoistedPackedTensor, offsets, sizes,
strides);
- maybeTransposeOp = makeTransposeOp(rewriter, loc, paddedTensor,
- outputTensor, transposeVector);
- paddedTensor = maybeTransposeOp.getResult(0);
+ maybeTransposeOp = rewriter.create<linalg::TransposeOp>(
+ loc, paddedTensor, outputTensor, transposeVector);
+ paddedTensor = maybeTransposeOp.getResult()[0];
}
// Innermost tensor.insert_slice and yields are optional / need loops.
@@ -938,7 +938,7 @@ static Value replaceByPackingResult(RewriterBase &rewriter,
FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp,
- SmallVectorImpl<GenericOp> &transposeOps) {
+ SmallVectorImpl<TransposeOp> &transposeOps) {
LLVM_DEBUG(DBGS() << "\n"; DBGS() << " Try to hoist " << *(opToHoist) << "\n";
DBGS() << " by " << numLoops << " loops\n");
@@ -980,9 +980,9 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
// Transpose the packed tensor back to the original storage order.
Value emptyTensor = rewriter.create<tensor::EmptyOp>(
loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
- GenericOp unTransposeOp =
- makeTransposeOp(rewriter, loc, newResult, emptyTensor, transposeVector);
- newResult = unTransposeOp.getResult(0);
+ TransposeOp unTransposeOp = rewriter.create<linalg::TransposeOp>(
+ loc, newResult, emptyTensor, transposeVector);
+ newResult = unTransposeOp.getResult()[0];
transposeOps.push_back(unTransposeOp);
}
@@ -999,11 +999,10 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
return newResult;
}
-FailureOr<Value>
-mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops,
- ArrayRef<int64_t> transposeVector,
- tensor::PadOp &hoistedOp,
- SmallVectorImpl<GenericOp> &transposeOps) {
+FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
+ tensor::PadOp opToHoist, int64_t numLoops,
+ ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp,
+ SmallVectorImpl<TransposeOp> &transposeOps) {
IRRewriter rewriter(opToHoist.getContext());
return hoistPaddingOnTensors(rewriter, opToHoist, numLoops, transposeVector,
hoistedOp, transposeOps);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp
index 518d2e138c02a9..a066c44408915e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp
@@ -299,7 +299,7 @@ mlir::linalg::padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp,
}
tensor::PadOp hoistedOp;
- SmallVector<GenericOp> transposeOps;
+ SmallVector<TransposeOp> transposeOps;
SmallVector<int64_t> transposeVector =
en.index() < options.transposePaddings.size()
? options.transposePaddings[en.index()]
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 6a3f2fc5fbc496..38e427af1c4846 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -249,41 +249,6 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
return sliceOp.getSource();
}
-GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
- Value outputTensor,
- ArrayRef<int64_t> transposeVector) {
- auto resultTensorType = cast<RankedTensorType>(outputTensor.getType());
- Type elementType = resultTensorType.getElementType();
-
- assert(isPermutationVector(transposeVector) &&
- "expect transpose vector to be a permutation");
- assert(transposeVector.size() ==
- static_cast<size_t>(resultTensorType.getRank()) &&
- "expect transpose vector size to match result tensor rank");
-
- // Compute the transpose and the indentity indexing maps.
- SmallVector<AffineMap> indexingMaps = {
- inversePermutation(AffineMap::getPermutationMap(
- SmallVector<unsigned>(transposeVector), b.getContext())),
- AffineMap::getMultiDimIdentityMap(transposeVector.size(),
- b.getContext())};
- SmallVector<utils::IteratorType> iteratorTypes(transposeVector.size(),
- utils::IteratorType::parallel);
-
- // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
- auto transposeOp =
- b.create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
- indexingMaps, iteratorTypes);
-
- // Create the body of the transpose operation.
- OpBuilder::InsertionGuard g(b);
- Region &body = transposeOp.getRegion();
- Block *bodyBlock = b.createBlock(&body, /*insertPt=*/{},
- {elementType, elementType}, {loc, loc});
- b.create<YieldOp>(loc, bodyBlock->getArgument(0));
- return transposeOp;
-}
-
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
auto memrefTypeTo = cast<MemRefType>(to.getType());
#ifndef NDEBUG
diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir
index ae63ed5f1a410a..a6943cf338d42a 100644
--- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir
@@ -115,8 +115,8 @@ func.func @pad_and_hoist_lhs_transpose(
// BUILD-PACKING-LOOP-NEST: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor<?x12x5xf32>) {
// BUILD-PACKING-LOOP-NEST: tensor.pad %{{.*}}
// BUILD-PACKING-LOOP-NEST: : tensor<?x12xf32> to tensor<5x12xf32>
- // BUILD-PACKING-LOOP-NEST: linalg.generic
- // BUILD-PACKING-LOOP-NEST: -> tensor<12x5xf32>
+ // BUILD-PACKING-LOOP-NEST: linalg.transpose
+ // BUILD-PACKING-LOOP-NEST: ins({{.*}} : tensor<5x12xf32>) outs({{.*}} : tensor<12x5xf32>)
// BUILD-PACKING-LOOP-NEST: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// BUILD-PACKING-LOOP-NEST-SAME: : tensor<12x5xf32> into tensor<?x12x5xf32>
// BUILD-PACKING-LOOP-NEST: scf.for %{{.*}} -> (tensor<24x25xf32>)
diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
index 499d9904c06b94..e075ff57666b0c 100644
--- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
@@ -123,17 +123,17 @@ func.func @pad_and_hoist_lhs_transpose(
-> tensor<24x25xf32>
{
// CHECK: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor<5x12x5xf32>) {
- // CHECK: tensor.pad %{{.*}}
+ // CHECK: %[[PAD:.*]] = tensor.pad %{{.*}}
// CHECK: : tensor<?x12xf32> to tensor<5x12xf32>
- // CHECK: linalg.generic
- // CHECK: -> tensor<12x5xf32>
+ // CHECK: linalg.transpose
+ // CHECK: ins(%[[PAD]] : tensor<5x12xf32>) outs(%{{.*}} : tensor<12x5xf32>)
// CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// CHECK-SAME: : tensor<12x5xf32> into tensor<5x12x5xf32>
// CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>) {
// CHECK: %[[PADDED:.*]] = tensor.extract_slice %[[PACKED]][%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// CHECK-SAME: : tensor<5x12x5xf32> to tensor<12x5xf32>
- // CHECK: %[[TRANSPOSED:.*]] = linalg.generic
- // CHECK: -> tensor<5x12xf32>
+ // CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[PADDED]] : tensor<12x5xf32>)
+ // CHECK: outs(%{{.*}} : tensor<5x12xf32>
// CHECK: linalg.matmul ins(%[[TRANSPOSED]]
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
func.return %0 : tensor<24x25xf32>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes sense to me.
@MaheshRavishankar @ftynse would that break anything on your side?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for checking. I guess its OK for us, but will know when this gets pulled in.
Hi, thanks for the approval. I also noticed that similarly, a method |
For readability purpose, generate linalg named ops when possible. For maintainability purpose, get rid of duplicated code.
For readability purpose, generate linalg named ops when possible.
For maintainability purpose, get rid of duplicated code.
Similarly,
makeMemRefCopyOp
defined undermakeTransposeOp
is deadcode andlinalg.copy
should be preferred.