[mlir][bufferization] Add TensorCopyInsertion pass

matthias-springer · matthias-springer · commit 3b2004e16b30 · 2022-06-09T21:55:52.000+02:00
This pass runs the One-Shot Analysis to find out which tensor OpOperands must bufferize out-of-place. It then rewrites those tensor OpOperands to explicit allocations with a copy in the form of `bufferization.alloc_tensor`. The resulting IR can then be bufferized without having to care about read-after-write conflicts. This change makes it possible to connect One-Shot Analysis to other bufferizations such as the sparse compiler. Differential Revision: https://reviews.llvm.org/D126573
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -78,6 +78,11 @@ std::unique_ptr<Pass> createAllocTensorEliminationPass();
 /// Create a pass that bufferizes ops from the bufferization dialect.
 std::unique_ptr<Pass> createBufferizationBufferizePass();
 
+/// Create a pass that resolves out-of-place tensor OpOperands with copies.
+std::unique_ptr<Pass> createTensorCopyInsertionPass();
+std::unique_ptr<Pass>
+createTensorCopyInsertionPass(const OneShotBufferizationOptions &options);
+
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -324,6 +324,25 @@ def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> {
   ];
 }
 
+def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
+  let summary = "Make all tensor IR inplaceable by inserting copies";
+  let description = [{
+    This pass runs One-Shot Analysis and inserts copies for all OpOperands that
+    were decided to bufferize out-of-place. After running this pass, a
+    bufferization can write to buffers directly (without making copies) and no
+    longer has to care about potential read-after-write conflicts.
+  }];
+  let options = [
+    Option<"allowReturnAllocs", "allow-return-allocs", "bool",
+            /*default=*/"false",
+           "Allows returning/yielding new allocations from a block.">,
+    Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
+           "bool", /*default=*/"0",
+           "Bufferize function boundaries (experimental).">,
+  ];
+  let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
+}
+
 def AllocTensorElimination : Pass<"eliminate-alloc-tensors"> {
   let summary = "Try to eliminate all alloc_tensor ops.";
   let description = [{
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/TensorCopyInsertion.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/TensorCopyInsertion.h
@@ -0,0 +1,26 @@
+//===- TensorCopyInsertion.h - Resolve Bufferization Conflicts w/ Copies --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_TENSORCOPYINSERTION_H
+#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_TENSORCOPYINSERTION_H
+
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+namespace bufferization {
+class AnalysisState;
+struct OneShotBufferizationOptions;
+
+LogicalResult insertTensorCopies(Operation *op,
+                                 const OneShotBufferizationOptions &options);
+
+LogicalResult insertTensorCopies(Operation *op, const AnalysisState &state);
+} // namespace bufferization
+} // namespace mlir
+
+#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_TENSORCOPYINSERTION_H
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_mlir_dialect_library(MLIRBufferizationTransforms
   FuncBufferizableOpInterfaceImpl.cpp
   OneShotAnalysis.cpp
   OneShotModuleBufferize.cpp
+  TensorCopyInsertion.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -0,0 +1,128 @@
+//===- TensorCopyInsertion.cpp - Resolve Bufferization Conflicts w/ Copies ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Bufferization/Transforms/TensorCopyInsertion.h"
+
+#include "PassDetail.h"
+
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+
+LogicalResult mlir::bufferization::insertTensorCopies(
+    Operation *op, const OneShotBufferizationOptions &options) {
+  OneShotAnalysisState state(op, options);
+  // Run normal One-Shot Bufferize analysis or One-Shot Module Bufferize
+  // analysis depending on whether function boundary bufferization is enabled or
+  // not.
+  if (options.bufferizeFunctionBoundaries) {
+    if (failed(analyzeModuleOp(cast<ModuleOp>(op), state)))
+      return failure();
+  } else {
+    if (failed(analyzeOp(op, state)))
+      return failure();
+  }
+
+  if (options.testAnalysisOnly)
+    return success();
+
+  return insertTensorCopies(op, state);
+}
+
+LogicalResult
+mlir::bufferization::insertTensorCopies(Operation *op,
+                                        const AnalysisState &state) {
+  OpBuilder builder(op->getContext());
+  WalkResult result = op->walk([&](Operation *op) {
+    auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op);
+    if (!bufferizableOp)
+      return WalkResult::skip();
+
+    // Find AllocTensorOps without an `escape` attribute and add the attribute
+    // based on analysis results.
+    if (auto allocTensorOp = dyn_cast<AllocTensorOp>(op)) {
+      if (allocTensorOp.escape())
+        return WalkResult::advance();
+      bool escape = state.isTensorYielded(allocTensorOp.result());
+      allocTensorOp.escapeAttr(builder.getBoolAttr(escape));
+      return WalkResult::advance();
+    }
+
+    // Find out-of-place tensor OpOperands and resolve them with an explicit
+    // tensor copy in the form of an AllocTensorOp.
+    builder.setInsertionPoint(op);
+    for (OpOperand &opOperand : op->getOpOperands()) {
+      if (opOperand.get().getType().isa<UnrankedTensorType>()) {
+        op->emitError("copies of unranked tensors are not supported");
+        return WalkResult::interrupt();
+      }
+      auto tensorType = opOperand.get().getType().dyn_cast<RankedTensorType>();
+      if (!tensorType)
+        continue;
+      if (state.isInPlace(opOperand))
+        continue;
+      SmallVector<OpResult> aliasingOpResults =
+          state.getAliasingOpResult(opOperand);
+      bool escape = llvm::any_of(
+          aliasingOpResults, [&](Value v) { return state.isTensorYielded(v); });
+      Value copy = builder.create<AllocTensorOp>(
+          op->getLoc(), tensorType, ValueRange(), opOperand.get(), escape);
+      opOperand.set(copy);
+    }
+
+    return WalkResult::advance();
+  });
+
+  return failure(result.wasInterrupted());
+}
+
+namespace {
+struct TensorCopyInsertionPass
+    : TensorCopyInsertionBase<TensorCopyInsertionPass> {
+  TensorCopyInsertionPass()
+      : TensorCopyInsertionBase<TensorCopyInsertionPass>(),
+        options(llvm::None) {}
+  TensorCopyInsertionPass(const OneShotBufferizationOptions &options)
+      : TensorCopyInsertionBase<TensorCopyInsertionPass>(), options(options) {}
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<bufferization::BufferizationDialect>();
+  }
+
+  void runOnOperation() override {
+    if (options.hasValue()) {
+      if (failed(insertTensorCopies(getOperation(), *options)))
+        signalPassFailure();
+    } else {
+      OneShotBufferizationOptions options;
+      options.allowReturnAllocs = allowReturnAllocs;
+      options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
+      if (failed(insertTensorCopies(getOperation(), options)))
+        signalPassFailure();
+    }
+  }
+
+private:
+  Optional<OneShotBufferizationOptions> options;
+};
+} // namespace
+
+std::unique_ptr<Pass> mlir::bufferization::createTensorCopyInsertionPass() {
+  return std::make_unique<TensorCopyInsertionPass>();
+}
+
+std::unique_ptr<Pass> mlir::bufferization::createTensorCopyInsertionPass(
+    const OneShotBufferizationOptions &options) {
+  return std::make_unique<TensorCopyInsertionPass>(options);
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -0,0 +1,27 @@
+// RUN: mlir-opt %s -tensor-copy-insertion -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
+
+// CHECK-LABEL: func @read_after_write_conflict(
+//  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
+// CHECK-FUNC-LABEL: func @read_after_write_conflict(
+func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
+  -> (tensor<?xf32>, tensor<?xf32>)
+{
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {escape = false} : tensor<?xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor<?xf32>
+  // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]]
+  %0 = tensor.insert %f into %t[%idx] : tensor<?xf32>
+  // CHECK: return %[[insert]], %[[t]]
+  return %0, %t : tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @return_alloc_tensor
+// CHECK-FUNC-LABEL: func @return_alloc_tensor
+func.func @return_alloc_tensor() -> (tensor<5xf32>) {
+  // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor() {escape = true} : tensor<5xf32>
+  %0 = bufferization.alloc_tensor() : tensor<5xf32>
+  return %0 : tensor<5xf32>
+}