llvm
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 30 additions & 38 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 30 additions & 38 deletions
@@ -37,14 +37,6 @@ struct TiledLinalgOp {
   SmallVector<Value, 4> tensorResults;
 };
 
-struct TiledAndFusedLinalgOps {
-  LinalgOp op;
-  SmallVector<LinalgOp, 1> fusedProducers;
-  SmallVector<LinalgOp, 1> originalProducers;
-  SmallVector<Operation *, 4> fusedLoops;
-  SmallVector<Operation *, 4> unfusedLoops;
-};
-
 /// Populates patterns for vectorization of all ConvN-D ops.
 void populateConvVectorizationPatterns(
     MLIRContext *context, SmallVectorImpl<OwningRewritePatternList> &patterns,
@@ -73,14 +65,11 @@ void populateLinalgBufferizePatterns(MLIRContext *context,
 Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
                                      const LinalgTilingOptions &options);
 
-/// Tile and fuse the `op` with its producers. The tile and fuse proceeds in
-/// three steps
-/// - Find tile loops that are fusable with its producer tile loops (a.k.a. tile
-///   + fuse loops).
-/// - Tile just these loops of the consumer (root operation) and fuse with
-///   the producer.
-/// - Tile again the tiled consumer operation produced above to do rest of
-///   the tiling specified by the `tilingOptions`.
+/// Fuse a sequence of linalg operations (`ops`) using tile-and-fuse. This
+/// proceeds as follows:
+/// - Find outer parallel loops in these ops that can be fused.
+/// - Tile fusable outer parallel loops of the last operation in the sequence.
+/// - Fuse the remaining operations with the tiled operation
 ///
 /// For example, consider the sequence of matmul below
 ///
@@ -107,36 +96,39 @@ Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
 ///     : memref<256x32xf32> to memref<16x32xf32, #map0>
 ///   %3 = subview %arg1[0, 0] [32, 32] [1, 1]
 ///     : memref<32x32xf32> to memref<32x32xf32, #map1>
+///   %4 = subview %arg3[0, 0] [32, 32] [1, 1]
+///     : memref<32x32xf32> to memref<32x32xf32, #map1>
 ///   linalg.matmul
 ///     ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>)
 ///     outs(%0 : memref<16x32xf32, #map0>)
-///   scf.parallel (%arg6) = (%c0) to (%c32) step (%c8) {
-///   scf.for %arg7 = %c0 to %c32 step %c4 {
-///     %4 = subview %0[0, %arg7] [16, 4] [1, 1]
-///       : memref<16x32xf32, #map0> to memref<16x4xf32, #map0>
-///     %5 = subview %arg3[%arg7, %arg6] [4, 8] [1, 1]
-///       : memref<32x32xf32> to memref<4x8xf32, #map0>
-///     %6 = subview %1[0, %arg6] [16, 8] [1, 1]
-///       : memref<16x32xf32, #map0> to memref<16x8xf32, #map0>
-///     linalg.matmul
-///       ins(%4, %5 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
-///       outs(%6 : memref<16x8xf32, #map0>)
-///     }
-///     scf.yield
-///   }
-///   scf.yield
+///   linalg.matmul
+///     ins(%0, %4 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
+///     outs(%1 : memref<16x8xf32, #map0>)
 /// }
 ///
-/// The following tiling options are handled differently in tile+fuse (compared
-/// to tile only)
+/// `tilingOptions` are used to tile the corresponding operation in `ops` (the
+/// size of the former should be same as size of the latter. Based on how
+/// tile+fuse is implemented, the fused loops are generated based on the last
+/// operation in the sequence. For example, the tile sizes for the fused loops
+/// is obtained from `tilingOptions.back()`. The following tiling options are
+/// handled differently in tile+fuse (compared to tile only)
 /// - Interchange of the tiling loops is not supported right now.
-/// - Distribution is only done for the tile+fuse loops. The tiled loops
-///   generated by the second tiling is not distributed.
+/// - Only the fused loops are distributed.
+struct TiledAndFusedLinalgOps {
+  /// Operation obtained by tiling the last operation in sequence of `ops`
+  /// passed to `tileAndFuseLinalgOps`.
+  LinalgOp op;
+  /// The dimension of the loops that are fused.
+  std::set<unsigned> fusedLoopDims;
+  /// The generated fused operations (created within the fused loops).
+  SmallVector<LinalgOp, 1> fusedProducers;
+  /// The fused loop generated.
+  SmallVector<Operation *, 4> fusedLoops;
+};
 Optional<TiledAndFusedLinalgOps>
-tileAndFuseLinalgOps(PatternRewriter &rewriter, LinalgOp op,
+tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef<LinalgOp> ops,
                      const LinalgDependenceGraph &dependenceGraph,
-                     const LinalgTilingOptions &tilingOptions,
-                     const LinalgFusionOptions &fusionOptions);
+                     const LinalgTilingOptions &tilingOptions);
 
 /// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`.
 /// This is an in-place transformation controlled by `interchangeVector`.