swiftlang
diff --git a/‎mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
Lines changed: 18 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
Lines changed: 18 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 131 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 131 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Lines changed: 1 addition & 0 deletions b/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Lines changed: 1 addition & 0 deletions
@@ -459,6 +459,24 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
             }));
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the position of buffer in inputs + outputs list
+      }],
+      /*retTy=*/"Optional<unsigned>",
+      /*methodName=*/"getIndexOfInputAndOutputBuffer",
+      /*args=*/(ins "Value":$value),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        Optional<unsigned> inputIndex = getIndexOfInput(value);
+        if (inputIndex.hasValue()) return inputIndex.getValue();
+        Optional<unsigned> outputIndex = getIndexOfOutputBuffer(value);
+        if (outputIndex.hasValue()) {
+          return $_op.getNumInputs() + outputIndex.getValue();
+        }
+        return llvm::None;
+      }]
+    >,
 
     //===------------------------------------------------------------------===//
     // Other interface methods.
 
@@ -18,6 +18,7 @@
 namespace mlir {
 namespace linalg {
 
+struct LinalgFusionOptions;
 struct LinalgTilingOptions;
 
 //===----------------------------------------------------------------------===//
@@ -30,6 +31,14 @@ struct TiledLinalgOp {
   SmallVector<Operation *, 8> loops;
 };
 
+struct TiledAndFusedLinalgOps {
+  LinalgOp op;
+  SmallVector<LinalgOp, 1> fusedProducers;
+  SmallVector<LinalgOp, 1> originalProducers;
+  SmallVector<Operation *, 4> fusedLoops;
+  SmallVector<Operation *, 4> unfusedLoops;
+};
+
 /// Populates patterns for vectorization of all ConvN-D ops.
 void populateConvVectorizationPatterns(
     MLIRContext *context, SmallVectorImpl<OwningRewritePatternList> &patterns,
@@ -53,6 +62,71 @@ void populateConvVectorizationPatterns(
 Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
                                      const LinalgTilingOptions &options);
 
+/// Tile and fuse the `op` with its producers. The tile and fuse proceeds in
+/// three steps
+/// - Find tile loops that are fusable with its producer tile loops (a.k.a. tile
+///   + fuse loops).
+/// - Tile just these loops of the consumer (root operation) and fuse with
+///   the producer.
+/// - Tile again the tiled consumer operation produced above to do rest of
+///   the tiling specified by the `tilingOptions`.
+///
+/// For example, consider the sequence of matmul below
+///
+///   linalg.matmul ins(%arg0, %arg1 : memref<256x32xf32>, memref<32x32xf32>)
+///                 outs(%arg2 : memref<256x32xf32>)
+///   linalg.matmul ins(%arg2, %arg3 : memref<256x32xf32>, memref<32x32xf32>)
+///                 outs(%arg4 : memref<256x32xf32>)
+///
+/// It is legal to fuse the RAW dependence (through %arg2) by only fusing the
+/// matmuls row-wise. For example, the fused computation for the above is shown
+/// below. The outer `scf.parallel` loop is the "fused" loop obtained by tiling
+/// along the rows of the matrix. The entire rows of the first matmul operation
+/// need to be computed before they can be used for the second matmul. The
+/// second matmul is further tiled (similar to normal tiling).
+///
+/// #map0 = affine_map<(d0, d1)[s0] -> (d0 * 32 + s0 + d1)>
+/// #map1 = affine_map<(d0, d1) -> (d0 * 32 + d1)>
+/// scf.parallel (%arg5) = (%c0) to (%c256) step (%c16) {
+///   %0 = subview %arg2[%arg5, 0] [16, 32] [1, 1]
+///     : memref<256x32xf32> to memref<16x32xf32, #map0>
+///   %1 = subview %arg4[%arg5, 0] [16, 32] [1, 1]
+///     : memref<256x32xf32> to memref<16x32xf32, #map0>
+///   %2 = subview %arg0[%arg5, 0] [16, 32] [1, 1]
+///     : memref<256x32xf32> to memref<16x32xf32, #map0>
+///   %3 = subview %arg1[0, 0] [32, 32] [1, 1]
+///     : memref<32x32xf32> to memref<32x32xf32, #map1>
+///   linalg.matmul
+///     ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>)
+///     outs(%0 : memref<16x32xf32, #map0>)
+///   scf.parallel (%arg6) = (%c0) to (%c32) step (%c8) {
+///   scf.for %arg7 = %c0 to %c32 step %c4 {
+///     %4 = subview %0[0, %arg7] [16, 4] [1, 1]
+///       : memref<16x32xf32, #map0> to memref<16x4xf32, #map0>
+///     %5 = subview %arg3[%arg7, %arg6] [4, 8] [1, 1]
+///       : memref<32x32xf32> to memref<4x8xf32, #map0>
+///     %6 = subview %1[0, %arg6] [16, 8] [1, 1]
+///       : memref<16x32xf32, #map0> to memref<16x8xf32, #map0>
+///     linalg.matmul
+///       ins(%4, %5 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
+///       outs(%6 : memref<16x8xf32, #map0>)
+///     }
+///     scf.yield
+///   }
+///   scf.yield
+/// }
+///
+/// The following tiling options are handled differently in tile+fuse (compared
+/// to tile only)
+/// - Interchange of the tiling loops is not supported right now.
+/// - Distribution is only done for the tile+fuse loops. The tiled loops
+///   generated by the second tiling is not distributed.
+Optional<TiledAndFusedLinalgOps>
+tileAndFuseLinalgOps(PatternRewriter &rewriter, LinalgOp op,
+                     const LinalgDependenceGraph &dependenceGraph,
+                     const LinalgTilingOptions &tilingOptions,
+                     const LinalgFusionOptions &fusionOptions);
+
 /// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`.
 /// This is an in-place transformation controlled by `interchangeVector`.
 /// An empty vector is interpreted as the identity permutation and the
@@ -323,6 +397,63 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern {
   }
 };
 
+struct LinalgFusionOptions {
+  /// Optional list of operands indices to use for fusion. When unspecified,
+  /// only one fusion is done, i.e., the pattern returns after the first fusion.
+  Optional<DenseSet<unsigned>> indicesToFuse = None;
+  LinalgFusionOptions &setIndicesToFuse(ArrayRef<int64_t> operands) {
+    indicesToFuse = DenseSet<unsigned>();
+    indicesToFuse->insert(operands.begin(), operands.end());
+    return *this;
+  }
+};
+
+struct LinalgBaseTileAndFusePattern : public RewritePattern {
+  LinalgBaseTileAndFusePattern(StringRef opName, MLIRContext *context,
+                               const LinalgDependenceGraph &dependenceGraph,
+                               LinalgTilingOptions tilingOptions,
+                               LinalgFusionOptions fusionOptions,
+                               LinalgMarker marker = LinalgMarker(),
+                               LinalgMarker fusedOpMarker = LinalgMarker(),
+                               LinalgMarker originalOpMarker = LinalgMarker(),
+                               PatternBenefit benefit = 1);
+  LogicalResult matchAndRewrite(Operation *op,
+                                PatternRewriter &rewriter) const override;
+
+private:
+  /// Dependence graph needed for fusion.
+  const LinalgDependenceGraph &dependenceGraph;
+  /// Options to control tiling.
+  LinalgTilingOptions tilingOptions;
+  /// Options to control fusion.
+  LinalgFusionOptions fusionOptions;
+  /// Marker to control application of the pattern.
+  LinalgMarker marker;
+  /// Marker set on the fused op after tile and fuse.
+  LinalgMarker fusedOpMarker;
+  /// The dependenceGraph is not modifiable, i.e. if the Linalg operations used
+  /// to build the dependence graph changes then the dependenceGraph needs to be
+  /// recomputed right now. To not invalidate the dependenceGraph as
+  /// transformation happens, the original producer can be tagged with a marker
+  /// that can be later used to delete the original operations.
+  LinalgMarker originalOpMarker;
+};
+
+template <typename OpTy>
+struct LinalgTileAndFusePattern : public LinalgBaseTileAndFusePattern {
+  LinalgTileAndFusePattern(MLIRContext *context,
+                           const LinalgDependenceGraph &dependenceGraph,
+                           LinalgTilingOptions tilingOptions,
+                           LinalgFusionOptions fusionOptions,
+                           LinalgMarker marker = LinalgMarker(),
+                           LinalgMarker fusedOpMarker = LinalgMarker(),
+                           LinalgMarker originalOpMarker = LinalgMarker(),
+                           PatternBenefit benefit = 1)
+      : LinalgBaseTileAndFusePattern(
+            OpTy::getOperationName(), context, dependenceGraph, tilingOptions,
+            fusionOptions, marker, fusedOpMarker, originalOpMarker, benefit) {}
+};
+
 ///
 /// Linalg interchange patterns.
 ///
 
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_LINALG_UTILS_H_
 
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/SCF/SCF.h"