Skip to content

Commit f8284d2

Browse files
author
MaheshRavishankar
committed
[mlir][Linalg] Fuse sequence of Linalg operation (on buffers)
Enhance the tile+fuse logic to allow fusing a sequence of operations. Differential Revision: https://reviews.llvm.org/D90991
1 parent 8b525c9 commit f8284d2

File tree

7 files changed

+581
-314
lines changed

7 files changed

+581
-314
lines changed

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,6 @@ struct TiledLinalgOp {
3737
SmallVector<Value, 4> tensorResults;
3838
};
3939

40-
struct TiledAndFusedLinalgOps {
41-
LinalgOp op;
42-
SmallVector<LinalgOp, 1> fusedProducers;
43-
SmallVector<LinalgOp, 1> originalProducers;
44-
SmallVector<Operation *, 4> fusedLoops;
45-
SmallVector<Operation *, 4> unfusedLoops;
46-
};
47-
4840
/// Populates patterns for vectorization of all ConvN-D ops.
4941
void populateConvVectorizationPatterns(
5042
MLIRContext *context, SmallVectorImpl<OwningRewritePatternList> &patterns,
@@ -73,14 +65,11 @@ void populateLinalgBufferizePatterns(MLIRContext *context,
7365
Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
7466
const LinalgTilingOptions &options);
7567

76-
/// Tile and fuse the `op` with its producers. The tile and fuse proceeds in
77-
/// three steps
78-
/// - Find tile loops that are fusable with its producer tile loops (a.k.a. tile
79-
/// + fuse loops).
80-
/// - Tile just these loops of the consumer (root operation) and fuse with
81-
/// the producer.
82-
/// - Tile again the tiled consumer operation produced above to do rest of
83-
/// the tiling specified by the `tilingOptions`.
68+
/// Fuse a sequence of linalg operations (`ops`) using tile-and-fuse. This
69+
/// proceeds as follows:
70+
/// - Find outer parallel loops in these ops that can be fused.
71+
/// - Tile fusable outer parallel loops of the last operation in the sequence.
72+
/// - Fuse the remaining operations with the tiled operation
8473
///
8574
/// For example, consider the sequence of matmul below
8675
///
@@ -107,36 +96,39 @@ Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
10796
/// : memref<256x32xf32> to memref<16x32xf32, #map0>
10897
/// %3 = subview %arg1[0, 0] [32, 32] [1, 1]
10998
/// : memref<32x32xf32> to memref<32x32xf32, #map1>
99+
/// %4 = subview %arg3[0, 0] [32, 32] [1, 1]
100+
/// : memref<32x32xf32> to memref<32x32xf32, #map1>
110101
/// linalg.matmul
111102
/// ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>)
112103
/// outs(%0 : memref<16x32xf32, #map0>)
113-
/// scf.parallel (%arg6) = (%c0) to (%c32) step (%c8) {
114-
/// scf.for %arg7 = %c0 to %c32 step %c4 {
115-
/// %4 = subview %0[0, %arg7] [16, 4] [1, 1]
116-
/// : memref<16x32xf32, #map0> to memref<16x4xf32, #map0>
117-
/// %5 = subview %arg3[%arg7, %arg6] [4, 8] [1, 1]
118-
/// : memref<32x32xf32> to memref<4x8xf32, #map0>
119-
/// %6 = subview %1[0, %arg6] [16, 8] [1, 1]
120-
/// : memref<16x32xf32, #map0> to memref<16x8xf32, #map0>
121-
/// linalg.matmul
122-
/// ins(%4, %5 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
123-
/// outs(%6 : memref<16x8xf32, #map0>)
124-
/// }
125-
/// scf.yield
126-
/// }
127-
/// scf.yield
104+
/// linalg.matmul
105+
/// ins(%0, %4 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
106+
/// outs(%1 : memref<16x8xf32, #map0>)
128107
/// }
129108
///
130-
/// The following tiling options are handled differently in tile+fuse (compared
131-
/// to tile only)
109+
/// `tilingOptions` are used to tile the corresponding operation in `ops` (the
110+
/// size of the former should be same as size of the latter. Based on how
111+
/// tile+fuse is implemented, the fused loops are generated based on the last
112+
/// operation in the sequence. For example, the tile sizes for the fused loops
113+
/// is obtained from `tilingOptions.back()`. The following tiling options are
114+
/// handled differently in tile+fuse (compared to tile only)
132115
/// - Interchange of the tiling loops is not supported right now.
133-
/// - Distribution is only done for the tile+fuse loops. The tiled loops
134-
/// generated by the second tiling is not distributed.
116+
/// - Only the fused loops are distributed.
117+
struct TiledAndFusedLinalgOps {
118+
/// Operation obtained by tiling the last operation in sequence of `ops`
119+
/// passed to `tileAndFuseLinalgOps`.
120+
LinalgOp op;
121+
/// The dimension of the loops that are fused.
122+
std::set<unsigned> fusedLoopDims;
123+
/// The generated fused operations (created within the fused loops).
124+
SmallVector<LinalgOp, 1> fusedProducers;
125+
/// The fused loop generated.
126+
SmallVector<Operation *, 4> fusedLoops;
127+
};
135128
Optional<TiledAndFusedLinalgOps>
136-
tileAndFuseLinalgOps(PatternRewriter &rewriter, LinalgOp op,
129+
tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef<LinalgOp> ops,
137130
const LinalgDependenceGraph &dependenceGraph,
138-
const LinalgTilingOptions &tilingOptions,
139-
const LinalgFusionOptions &fusionOptions);
131+
const LinalgTilingOptions &tilingOptions);
140132

141133
/// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`.
142134
/// This is an in-place transformation controlled by `interchangeVector`.

0 commit comments

Comments
 (0)