swiftlang
diff --git a/‎mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
Lines changed: 4 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
Lines changed: 3 additions & 0 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
Lines changed: 5 additions & 0 deletions b/‎mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
Lines changed: 5 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
Lines changed: 16 additions & 0 deletions b/‎mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
Lines changed: 16 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
Lines changed: 41 additions & 12 deletions b/‎mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
Lines changed: 41 additions & 12 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/DenseBufferizationPass.cpp
Lines changed: 74 additions & 0 deletions b/‎mlir/lib/Dialect/SparseTensor/Transforms/DenseBufferizationPass.cpp
Lines changed: 74 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
Lines changed: 21 additions & 2 deletions b/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
Lines changed: 21 additions & 2 deletions
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
Lines changed: 7 additions & 1 deletion b/‎mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
Lines changed: 7 additions & 1 deletion
diff --git a/‎mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
Lines changed: 12 additions & 42 deletions b/‎mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
Lines changed: 12 additions & 42 deletions
@@ -471,6 +471,10 @@ allocateTensorForShapedValue(OpBuilder &b, Location loc, Value shapedValue,
                              bool escape, const BufferizationOptions &options,
                              bool copy = true);
 
+/// Return `true` if the allocation of the given op is guaranteed to not escape
+/// the containing block.
+bool allocationDoesNotEscape(OpResult opResult);
+
 /// Lookup the buffer for the given value. If the value was not bufferized
 /// yet, wrap it in a ToMemrefOp. Otherwise, it is the result of a ToTensorOp,
 /// from which the memref operand is returned.
 
@@ -44,6 +44,9 @@ struct SparseCompilerOptions
   PassOptions::Option<bool> enableVLAVectorization{
       *this, "enable-vla-vectorization",
       desc("Enable vector length agnostic vectorization"), init(false)};
+  PassOptions::Option<bool> testBufferizationAnalysisOnly{
+      *this, "test-bufferization-analysis-only",
+      desc("Run only the inplacability analysis"), init(false)};
 
   /// Projects out the options for `createSparsificationPass`.
   SparsificationOptions sparsificationOptions() const {
 
@@ -23,6 +23,9 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
+namespace bufferization {
+struct OneShotBufferizationOptions;
+} // namespace bufferization
 
 // Forward.
 class TypeConverter;
@@ -131,6 +134,8 @@ void populateSparseTensorConversionPatterns(
     const SparseTensorConversionOptions &options =
         SparseTensorConversionOptions());
 
+std::unique_ptr<Pass> createDenseBufferizationPass(
+    const bufferization::OneShotBufferizationOptions &options);
 std::unique_ptr<Pass> createSparseTensorConversionPass();
 std::unique_ptr<Pass>
 createSparseTensorConversionPass(const SparseTensorConversionOptions &options);
 
@@ -50,6 +50,22 @@ static Operation *getOwnerOfValue(Value value) {
   return value.cast<BlockArgument>().getOwner()->getParentOp();
 }
 
+bool bufferization::allocationDoesNotEscape(OpResult opResult) {
+#ifndef NDEBUG
+  auto bufferizableOp = opResult.getDefiningOp<BufferizableOpInterface>();
+  assert(bufferizableOp && bufferizableOp.bufferizesToAllocation(opResult) &&
+         "expected op that bufferizes to an allocation");
+#endif // NDEBUG
+
+  Operation *op = opResult.getDefiningOp();
+  // If there is no 'escape' attribute, we cannot say for sure.
+  if (!op->hasAttr(BufferizationDialect::kEscapeAttrName))
+    return false;
+  auto attr =
+      op->getAttrOfType<ArrayAttr>(BufferizationDialect::kEscapeAttrName);
+  return !attr[opResult.getResultNumber()].cast<BoolAttr>().getValue();
+}
+
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
 
@@ -9,20 +9,41 @@
 #include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
 
 #include "mlir/Conversion/Passes.h"
-#include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/Func/Transforms/Passes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
 #include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
-#include "mlir/Dialect/Tensor/Transforms/Passes.h"
-#include "mlir/Dialect/Vector/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 
 using namespace mlir;
 using namespace mlir::sparse_tensor;
 
+/// Return configuration options for One-Shot Bufferize.
+static bufferization::OneShotBufferizationOptions
+getBufferizationOptions(bool analysisOnly) {
+  using namespace bufferization;
+  OneShotBufferizationOptions options;
+  options.bufferizeFunctionBoundaries = true;
+  // TODO(springerm): To spot memory leaks more easily, returning dense allocs
+  // should be disallowed.
+  options.allowReturnAllocs = true;
+  options.functionBoundaryTypeConversion =
+      BufferizationOptions::LayoutMapOption::IdentityLayoutMap;
+  options.unknownTypeConverterFn = [](Value value, unsigned memorySpace,
+                                      const BufferizationOptions &options) {
+    return getMemRefTypeWithStaticIdentityLayout(
+        value.getType().cast<TensorType>(), memorySpace);
+  };
+  if (analysisOnly) {
+    options.testAnalysisOnly = true;
+    options.printConflicts = true;
+  }
+  return options;
+}
+
 //===----------------------------------------------------------------------===//
 // Pipeline implementation.
 //===----------------------------------------------------------------------===//
@@ -31,20 +52,28 @@ void mlir::sparse_tensor::buildSparseCompiler(
     OpPassManager &pm, const SparseCompilerOptions &options) {
   // TODO(wrengr): ensure the original `pm` is for ModuleOp
   pm.addNestedPass<func::FuncOp>(createLinalgGeneralizationPass());
-  pm.addPass(createLinalgElementwiseOpFusionPass());
+  // TODO(springerm): Reactivate element-wise op fusion pass. This pass does not
+  // fit well with bufferization because it replaces unused "out" operands of
+  // LinalgOps with InitTensorOps. This would result in additional buffer
+  // allocations during bufferization.
+  // pm.addPass(createLinalgElementwiseOpFusionPass());
+  pm.addPass(
+      bufferization::createTensorCopyInsertionPass(getBufferizationOptions(
+          /*analysisOnly=*/options.testBufferizationAnalysisOnly)));
+  if (options.testBufferizationAnalysisOnly)
+    return;
   pm.addPass(createSparsificationPass(options.sparsificationOptions()));
   pm.addPass(createSparseTensorConversionPass(
       options.sparseTensorConversionOptions()));
-  pm.addNestedPass<func::FuncOp>(createLinalgBufferizePass());
-  pm.addNestedPass<func::FuncOp>(vector::createVectorBufferizePass());
+  pm.addPass(createDenseBufferizationPass(
+      getBufferizationOptions(/*analysisOnly=*/false)));
+  pm.addNestedPass<func::FuncOp>(
+      mlir::bufferization::createFinalizingBufferizePass());
+  // TODO(springerm): Add sparse support to the BufferDeallocation pass and add
+  // it to this pipeline.
   pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
   pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
   pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
-  pm.addPass(func::createFuncBufferizePass());
-  pm.addPass(arith::createConstantBufferizePass());
-  pm.addNestedPass<func::FuncOp>(createTensorBufferizePass());
-  pm.addNestedPass<func::FuncOp>(
-      mlir::bufferization::createFinalizingBufferizePass());
   pm.addPass(createLowerAffinePass());
   pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
   pm.addPass(createMemRefToLLVMPass());
 
@@ -1,6 +1,7 @@
 add_mlir_dialect_library(MLIRSparseTensorTransforms
   BufferizableOpInterfaceImpl.cpp
   CodegenUtils.cpp
+  DenseBufferizationPass.cpp
   Sparsification.cpp
   SparseTensorConversion.cpp
   SparseTensorPasses.cpp
 
@@ -0,0 +1,74 @@
+//===- DenseBufferizationPass.cpp - Dense bufferization pass --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
+
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
+
+using namespace mlir;
+using namespace mlir::func;
+
+namespace mlir {
+namespace sparse_tensor {
+
+/// Return `true` if one of the given types is a sparse tensor type.
+static bool containsSparseTensor(TypeRange types) {
+  for (Type t : types)
+    if (getSparseTensorEncoding(t))
+      return true;
+  return false;
+}
+
+/// A pass that bufferizes only dense tensor ops and ignores all sparse tensor
+/// ops. No buffer copies are inserted. All tensor OpOperands must be
+/// inplacable.
+class BufferizeDenseOpsPass
+    : public PassWrapper<BufferizeDenseOpsPass, OperationPass<ModuleOp>> {
+public:
+  BufferizeDenseOpsPass(
+      const bufferization::OneShotBufferizationOptions &options)
+      : PassWrapper<BufferizeDenseOpsPass, OperationPass<ModuleOp>>(),
+        options(options) {}
+
+  void runOnOperation() override {
+    // Disallow all sparse tensor ops, so that only dense tensor ops are
+    // bufferized.
+    bufferization::OpFilter opFilter;
+    opFilter.allowOperation([&](Operation *op) {
+      if (containsSparseTensor(TypeRange(op->getResults())) ||
+          containsSparseTensor(TypeRange(op->getOperands())))
+        return false;
+      if (auto funcOp = dyn_cast<func::FuncOp>(op)) {
+        FunctionType funcType = funcOp.getFunctionType();
+        if (containsSparseTensor(funcType.getInputs()) ||
+            containsSparseTensor(funcType.getResults()))
+          return false;
+      }
+      return true;
+    });
+
+    if (failed(bufferization::bufferizeOp(getOperation(), options,
+                                          /*copyBeforeWrite=*/false,
+                                          &opFilter)))
+      signalPassFailure();
+  }
+
+private:
+  bufferization::OneShotBufferizationOptions options;
+};
+} // namespace sparse_tensor
+} // namespace mlir
+
+std::unique_ptr<Pass> mlir::createDenseBufferizationPass(
+    const bufferization::OneShotBufferizationOptions &options) {
+  return std::make_unique<mlir::sparse_tensor::BufferizeDenseOpsPass>(options);
+}
@@ -16,6 +16,7 @@
 
 #include "CodegenUtils.h"
 
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -30,6 +31,8 @@
 
 using namespace mlir;
 using namespace mlir::sparse_tensor;
+using mlir::bufferization::BufferizableOpInterface;
+using mlir::bufferization::BufferizationDialect;
 
 namespace {
 
@@ -320,8 +323,8 @@ static Value genIndexAndValueForSparse(OpBuilder &builder, Location loc,
   return builder.create<tensor::ExtractOp>(loc, values, ivs[0]);
 }
 
-/// Generates code to allocate a tensor of the given type, and zero
-/// initialize it.  If the tensor type has any dynamic sizes, then the
+/// Generates code to allocate a buffer of the given type, and zero
+/// initialize it.  If the buffer type has any dynamic sizes, then the
 /// `sizes` parameter should be as filled by sizesFromPtr(); that way
 /// we can reuse the genDimSizeCall() results generated by sizesFromPtr().
 static Value allocDenseTensor(OpBuilder &builder, Location loc,
@@ -340,6 +343,11 @@ static Value allocDenseTensor(OpBuilder &builder, Location loc,
   return mem;
 }
 
+/// Generates code to deallocate a dense buffer.
+static void deallocDenseTensor(OpBuilder &builder, Location loc, Value buffer) {
+  builder.create<memref::DeallocOp>(loc, buffer);
+}
+
 /// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into
 /// the tensor created by allocDenseTensor().  The `rank` is the rank
 /// of the `tensor` and the length of `ind`.
@@ -618,6 +626,9 @@ class SparseTensorAllocConverter
   LogicalResult
   matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
+    if (op.getCopy())
+      return rewriter.notifyMatchFailure(op,
+                                         "sparse tensor copy not implemented");
     RankedTensorType resType = op.getType();
     auto enc = getSparseTensorEncoding(resType);
     if (!enc)
@@ -743,6 +754,9 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       Value iter = genNewCall(rewriter, op, params);
       Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
       Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
+      Block *insertionBlock = rewriter.getInsertionBlock();
+      // TODO: Dense buffers should be allocated/deallocated via the callback
+      // in BufferizationOptions.
       Value dst = allocDenseTensor(rewriter, loc, dstTensorTp, sizes);
       SmallVector<Value> noArgs;
       SmallVector<Type> noTypes;
@@ -758,6 +772,11 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       rewriter.setInsertionPointAfter(whileOp);
       genDelCOOCall(rewriter, op, elemTp, iter);
       rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, dst);
+      // Deallocate the buffer.
+      if (bufferization::allocationDoesNotEscape(op->getOpResult(0))) {
+        rewriter.setInsertionPoint(insertionBlock->getTerminator());
+        deallocDenseTensor(rewriter, loc, dst);
+      }
       return success();
     }
     if (!encDst && !encSrc) {
 
@@ -127,11 +127,17 @@ struct SparseTensorConversionPass
         });
     // The following operations and dialects may be introduced by the
     // rewriting rules, and are therefore marked as legal.
-    target.addLegalOp<complex::ConstantOp, complex::NotEqualOp, linalg::FillOp,
+    target.addLegalOp<bufferization::ToMemrefOp, bufferization::ToTensorOp,
+                      complex::ConstantOp, complex::NotEqualOp, linalg::FillOp,
                       linalg::YieldOp, tensor::ExtractOp>();
     target.addLegalDialect<
         arith::ArithmeticDialect, bufferization::BufferizationDialect,
         LLVM::LLVMDialect, memref::MemRefDialect, scf::SCFDialect>();
+    target.addDynamicallyLegalOp<bufferization::AllocTensorOp>(
+        [&](bufferization::AllocTensorOp op) {
+          // Dense tensors are legal, sparse tensors are not.
+          return !static_cast<bool>(op.getType().getEncoding());
+        });
     // Translate strategy flags to strategy options.
     SparseTensorConversionOptions options(
         sparseToSparseConversionStrategy(sparseToSparse));
 
@@ -308,17 +308,6 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
   return true;
 }
 
-/// Returns true if tensor has an in-place annotation.
-static bool isInPlace(Value val) {
-  if (auto arg = val.dyn_cast<BlockArgument>())
-    if (auto funcOp = dyn_cast<func::FuncOp>(arg.getOwner()->getParentOp()))
-      if (auto attr = funcOp.getArgAttrOfType<BoolAttr>(
-              arg.getArgNumber(),
-              bufferization::BufferizableOpInterface::kInplaceableAttrName))
-        return attr.getValue();
-  return false;
-}
-
 /// Returns true if tensor materializes uninitialized into the computation.
 static bool isMaterializing(Value val) {
   return val.getDefiningOp<linalg::InitTensorOp>() ||
@@ -355,9 +344,8 @@ static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op,
     return true;
   // A tensor expression with a sparse output tensor that changes its values
   // but not its nonzero structure, an operation called "simply dynamic" in
-  // [Bik96,Ch9], is also admissable without special codegen, provided
-  // the tensor's underlying sparse storage scheme can be modified in-place.
-  if (merger.isSingleCondition(tensor, exp) && isInPlace(lhs->get()))
+  // [Bik96,Ch9], is also admissable without special codegen.
+  if (merger.isSingleCondition(tensor, exp))
     return true;
   // Accept "truly dynamic" if the output tensor materializes uninitialized
   // into the computation and insertions occur in lexicographic index order.
@@ -486,37 +474,19 @@ static Value genOutputBuffer(CodeGen &codegen, OpBuilder &builder,
   OpOperand *lhs = op.getOutputOperand(0);
   Value tensor = lhs->get();
   bool isInit = op.isInitTensor(lhs);
-  // An output tensor that is in-place can simply materialize from the buffer
-  // of the tensor that appears in the outs() clause. For updates, this has
-  // the advantage that only the nonzero value are involved in the computation,
-  // keeping the operation O(nnz). In all other cases, we are forced to zero
-  // out the buffer to enforce the assumption above, which may negatively
-  // impact running complexity (viz. O(n^2 + nnz) vs. O(nnz) for matrices).
+  // An output tensor can simply materialize from the buffer of the tensor that
+  // appears in the outs() clause. For updates, this has the advantage that only
+  // the nonzero value are involved in the computation, keeping the operation
+  // O(nnz). In all other cases, we are forced to zero out the buffer to enforce
+  // the assumption above, which may negatively impact running complexity
+  // (viz. O(n^2 + nnz) vs. O(nnz) for matrices).
   // TODO: use better analysis to avoid zeroing out the buffer?
-  if (isInPlace(tensor)) {
-    Value init =
-        builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
-    if (!isInit) {
-      Value zero = constantZero(builder, loc, denseTp.getElementType());
-      builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{init});
-    }
-    return init;
-  }
-  // By default, a new buffer is allocated which is either set to zero (when
-  // no updates occur or the tensor materializes into this computation) or
-  // initialized to the value of the tensor defined in the outs() clause.
-  // This is always correct (since it enforces all assumptions above) but
-  // may negatively impact running complexity as explained above.
-  Value alloc = builder.create<memref::AllocOp>(loc, denseTp, args);
-  if (!isInit || isMaterializing(tensor)) {
+  Value init = builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
+  if (!isInit) {
     Value zero = constantZero(builder, loc, denseTp.getElementType());
-    builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{alloc});
-  } else {
-    Value init =
-        builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
-    builder.create<memref::CopyOp>(loc, init, alloc);
+    builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{init});
   }
-  return alloc;
+  return init;
 }
 
 /// Local bufferization of all dense and sparse data structures.