Skip to content

Commit 6529c9a

Browse files
[mlir][linalg][NFC] Remove linalg subset hoisting (#70636)
Remove `SubsetHoisting.cpp` and migrate all remaining uses to the newly added loop-invariant subset hoisting transform in `mlir/Transforms`.
1 parent b9fe461 commit 6529c9a

File tree

5 files changed

+3
-668
lines changed

5 files changed

+3
-668
lines changed

mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -45,109 +45,6 @@ namespace linalg {
4545
/// when used on distributed loops with memref semantics!
4646
void hoistRedundantVectorTransfers(func::FuncOp func);
4747

48-
/// Greedily hoist redundant subset extract/insert operations on tensors outside
49-
/// of `forOp`. The logic follows:
50-
/// 1. Look for a write walking back from the `forOp` yield.
51-
/// 2. Check the uses of the matching block argument and look for a matching
52-
/// read (i.e. extract_slice of transfer_read) with matching indices.
53-
/// 3. In the case of a transfer_write, we can bypass other non-conflicting
54-
/// operations and find more hoisting opportunities.
55-
/// 4. Hoist the read/write pair and update the tensor SSA links.
56-
///
57-
/// Return the unmodified `forOp` if no hoisting occured.
58-
/// Return a new scf::ForOp if hoisting on tensors occured.
59-
///
60-
/// After this transformation the returned scf::ForOp may have unused arguments
61-
/// that can be removed by application of canonicalization patterns.
62-
///
63-
/// Example:
64-
/// ========
65-
/// IR Resembling:
66-
///
67-
/// ```
68-
/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0)->(tensor<10xf32>) {
69-
/// %1 = scf.for %j = %l to %u step %s iter_args(%a6 = %a0)->(tensor<10xf32>) {
70-
/// %e = tensor.extract_slice %a6[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
71-
/// %r = vector.transfer_read %e[%c0], %cst: tensor<?xf32>, vector<4xf32>
72-
/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
73-
/// %w = vector.transfer_write %u, %e[%c0] : vector<4xf32>, tensor<?xf32>
74-
/// %st = tensor.insert_slice %w into %a6[%i][%sz][1]
75-
/// : tensor<?xf32> into tensor<10xf32>
76-
/// scf.yield %st: tensor<10xf32>
77-
/// }
78-
/// scf.yield %1: tensor<10xf32>
79-
/// }
80-
/// ```
81-
///
82-
/// Progressively hoists to:
83-
///
84-
/// ```
85-
/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
86-
/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
87-
/// %1:2 = scf.for %j = %l to %u step %s iter_args(%a6 = a0, %a7 = %e)
88-
/// -> (tensor<10xf32>, tensor<?xf32>) {
89-
/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
90-
/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
91-
/// %w = vector.transfer_write %u, %a7[%c0] : vector<4xf32>, tensor<?xf32>
92-
/// scf.yield %a6, %w: tensor<10xf32>, tensor<?xf32>
93-
/// }
94-
/// %st = tensor.insert_slice %1#1 into %1#0[%i][%sz][1]
95-
/// : tensor<?xf32> into tensor<10xf32>
96-
/// scf.yield %1: tensor<10xf32>
97-
/// }
98-
/// ```
99-
///
100-
/// and
101-
///
102-
/// ```
103-
/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
104-
/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
105-
/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
106-
/// %1:3 = scf.for %j = %l to %u step %s iter_args(%a6 = a0, %a7 = %e, %a7 = r)
107-
/// -> (tensor<10xf32>, tensor<?xf32>, vector<4xf32>) {
108-
/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
109-
/// scf.yield %a6, %a7, %u: tensor<10xf32>, tensor<?xf32>, vector<4xf32>
110-
/// }
111-
/// %w = vector.transfer_write %1#2, %1#1[%c0] : vector<4xf32>, tensor<?xf32>
112-
/// %st = tensor.insert_slice %w into %1#0[%i][%sz][1]
113-
/// : tensor<?xf32> into tensor<10xf32>
114-
/// scf.yield %1: tensor<10xf32>
115-
/// }
116-
/// ```
117-
///
118-
/// It can then canonicalize to:
119-
///
120-
/// ```
121-
/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
122-
/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
123-
/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
124-
/// %1 = scf.for %j = %l to %u step %s iter_args(%a7 = r)
125-
/// -> (tensor<10xf32>, tensor<?xf32>, vector<4xf32>) {
126-
/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
127-
/// scf.yield %u: vector<4xf32>
128-
/// }
129-
/// %w = vector.transfer_write %1, %e[%c0] : vector<4xf32>, tensor<?xf32>
130-
/// %st = tensor.insert_slice %w into %a0[%i][%sz][1]
131-
/// : tensor<?xf32> into tensor<10xf32>
132-
/// scf.yield %1: tensor<10xf32>
133-
/// }
134-
/// ```
135-
///
136-
// TODO: This should be further generalized along a few different axes:
137-
// - Other loops than scf.ForOp that operate on tensors (both sequential and
138-
// parallel loops).
139-
// - Other subset extract/insert pairs than tensor.extract/insert_slice and
140-
// vector.transfer_read/write.
141-
// - More general areSubsetDisjoint analysis/interface to work across all
142-
// subset op types and allow bypassing non-WAW-conflicting operations in
143-
// more cases.
144-
scf::ForOp hoistRedundantSubsetExtractInsert(RewriterBase &rewriter,
145-
scf::ForOp forOp);
146-
147-
/// Call into `hoistRedundantSubsetInsertExtract` without a RewriterBase.
148-
// TODO: obsolete and should be retired
149-
void hoistRedundantVectorTransfersOnTensor(func::FuncOp func);
150-
15148
} // namespace linalg
15249
} // namespace mlir
15350

mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms
2727
Specialize.cpp
2828
Split.cpp
2929
SplitReduction.cpp
30-
SubsetHoisting.cpp
3130
SubsetInsertionOpInterfaceImpl.cpp
3231
SwapExtractSliceWithFillPatterns.cpp
3332
Tiling.cpp

mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "mlir/IR/Dominance.h"
2626
#include "mlir/IR/Matchers.h"
2727
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
28+
#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h"
2829
#include "mlir/Transforms/RegionUtils.h"
2930
#include "llvm/Support/Debug.h"
3031

@@ -292,8 +293,8 @@ void HoistPaddingAnalysis::enableHoistPadding(RewriterBase &rewriter) {
292293
// enclosing loop, try to apply hoisting on this outermost loop.
293294
// TODO: we may want finer-grained hoisting of only that particular `sliceOp`.
294295
if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
295-
outermostEnclosingForOp =
296-
hoistRedundantSubsetExtractInsert(rewriter, outermostEnclosingForOp);
296+
outermostEnclosingForOp = cast<scf::ForOp>(
297+
hoistLoopInvariantSubsets(rewriter, outermostEnclosingForOp));
297298
}
298299
}
299300

mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,6 @@ using llvm::dbgs;
4343
using namespace mlir;
4444
using namespace mlir::linalg;
4545

46-
void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
47-
IRRewriter rewriter(func->getContext());
48-
// TODO: walking in some reverse / inside-out order would be more efficient
49-
// and would capture more cases.
50-
func.walk([&](scf::ForOp forOp) {
51-
hoistRedundantSubsetExtractInsert(rewriter, forOp);
52-
});
53-
}
54-
5546
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
5647
LoopLikeOpInterface loop) {
5748
Value source = transferRead.getSource();

0 commit comments

Comments
 (0)