Skip to content

Commit c66303c

Browse files
[mlir][sparse] Switch to One-Shot Bufferize
This change removes the partial bufferization passes from the sparse compilation pipeline and replaces them with One-Shot Bufferize. One-Shot Analysis (and TensorCopyInsertion) is used to resolve all out-of-place bufferizations, dense and sparse. Dense ops are then bufferized with BufferizableOpInterface. Sparse ops are still bufferized in the Sparsification pass. Details: * Dense allocations are automatically deallocated, unless they are yielded from a block. (In that case the alloc would leak.) All test cases are modified accordingly. E.g., some funcs now have an "out" tensor argument that is returned from the function. (That way, the allocation happens at the call site.) * Sparse allocations are *not* automatically deallocated. They must be "released" manually. (No change, this will be addressed in a future change.) * Sparse tensor copies are not supported yet. (Future change) * Sparsification no longer has to consider inplacability. If necessary, allocations and/or copies are inserted during TensorCopyInsertion. All tensors are inplaceable by the time Sparsification is running. Instead of marking a tensor as "not inplaceable", it can be marked as "not writable", which will trigger an allocation and/or copy during TensorCopyInsertion. Differential Revision: https://reviews.llvm.org/D129356
1 parent e5c4cde commit c66303c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+544
-620
lines changed

mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,10 @@ allocateTensorForShapedValue(OpBuilder &b, Location loc, Value shapedValue,
471471
bool escape, const BufferizationOptions &options,
472472
bool copy = true);
473473

474+
/// Return `true` if the allocation of the given op is guaranteed to not escape
475+
/// the containing block.
476+
bool allocationDoesNotEscape(OpResult opResult);
477+
474478
/// Lookup the buffer for the given value. If the value was not bufferized
475479
/// yet, wrap it in a ToMemrefOp. Otherwise, it is the result of a ToTensorOp,
476480
/// from which the memref operand is returned.

mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ struct SparseCompilerOptions
4444
PassOptions::Option<bool> enableVLAVectorization{
4545
*this, "enable-vla-vectorization",
4646
desc("Enable vector length agnostic vectorization"), init(false)};
47+
PassOptions::Option<bool> testBufferizationAnalysisOnly{
48+
*this, "test-bufferization-analysis-only",
49+
desc("Run only the inplacability analysis"), init(false)};
4750

4851
/// Projects out the options for `createSparsificationPass`.
4952
SparsificationOptions sparsificationOptions() const {

mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#include "mlir/Pass/Pass.h"
2424

2525
namespace mlir {
26+
namespace bufferization {
27+
struct OneShotBufferizationOptions;
28+
} // namespace bufferization
2629

2730
// Forward.
2831
class TypeConverter;
@@ -131,6 +134,8 @@ void populateSparseTensorConversionPatterns(
131134
const SparseTensorConversionOptions &options =
132135
SparseTensorConversionOptions());
133136

137+
std::unique_ptr<Pass> createDenseBufferizationPass(
138+
const bufferization::OneShotBufferizationOptions &options);
134139
std::unique_ptr<Pass> createSparseTensorConversionPass();
135140
std::unique_ptr<Pass>
136141
createSparseTensorConversionPass(const SparseTensorConversionOptions &options);

mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@ static Operation *getOwnerOfValue(Value value) {
5050
return value.cast<BlockArgument>().getOwner()->getParentOp();
5151
}
5252

53+
bool bufferization::allocationDoesNotEscape(OpResult opResult) {
54+
#ifndef NDEBUG
55+
auto bufferizableOp = opResult.getDefiningOp<BufferizableOpInterface>();
56+
assert(bufferizableOp && bufferizableOp.bufferizesToAllocation(opResult) &&
57+
"expected op that bufferizes to an allocation");
58+
#endif // NDEBUG
59+
60+
Operation *op = opResult.getDefiningOp();
61+
// If there is no 'escape' attribute, we cannot say for sure.
62+
if (!op->hasAttr(BufferizationDialect::kEscapeAttrName))
63+
return false;
64+
auto attr =
65+
op->getAttrOfType<ArrayAttr>(BufferizationDialect::kEscapeAttrName);
66+
return !attr[opResult.getResultNumber()].cast<BoolAttr>().getValue();
67+
}
68+
5369
/// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
5470
/// shaped value is copied. Otherwise, a tensor with undefined contents is
5571
/// allocated.

mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,41 @@
99
#include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
1010

1111
#include "mlir/Conversion/Passes.h"
12-
#include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
12+
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
13+
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
1314
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
1415
#include "mlir/Dialect/Func/IR/FuncOps.h"
15-
#include "mlir/Dialect/Func/Transforms/Passes.h"
1616
#include "mlir/Dialect/Linalg/Passes.h"
1717
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
1818
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
19-
#include "mlir/Dialect/Tensor/Transforms/Passes.h"
20-
#include "mlir/Dialect/Vector/Transforms/Passes.h"
2119
#include "mlir/Pass/PassManager.h"
2220

2321
using namespace mlir;
2422
using namespace mlir::sparse_tensor;
2523

24+
/// Return configuration options for One-Shot Bufferize.
25+
static bufferization::OneShotBufferizationOptions
26+
getBufferizationOptions(bool analysisOnly) {
27+
using namespace bufferization;
28+
OneShotBufferizationOptions options;
29+
options.bufferizeFunctionBoundaries = true;
30+
// TODO(springerm): To spot memory leaks more easily, returning dense allocs
31+
// should be disallowed.
32+
options.allowReturnAllocs = true;
33+
options.functionBoundaryTypeConversion =
34+
BufferizationOptions::LayoutMapOption::IdentityLayoutMap;
35+
options.unknownTypeConverterFn = [](Value value, unsigned memorySpace,
36+
const BufferizationOptions &options) {
37+
return getMemRefTypeWithStaticIdentityLayout(
38+
value.getType().cast<TensorType>(), memorySpace);
39+
};
40+
if (analysisOnly) {
41+
options.testAnalysisOnly = true;
42+
options.printConflicts = true;
43+
}
44+
return options;
45+
}
46+
2647
//===----------------------------------------------------------------------===//
2748
// Pipeline implementation.
2849
//===----------------------------------------------------------------------===//
@@ -31,20 +52,28 @@ void mlir::sparse_tensor::buildSparseCompiler(
3152
OpPassManager &pm, const SparseCompilerOptions &options) {
3253
// TODO(wrengr): ensure the original `pm` is for ModuleOp
3354
pm.addNestedPass<func::FuncOp>(createLinalgGeneralizationPass());
34-
pm.addPass(createLinalgElementwiseOpFusionPass());
55+
// TODO(springerm): Reactivate element-wise op fusion pass. This pass does not
56+
// fit well with bufferization because it replaces unused "out" operands of
57+
// LinalgOps with InitTensorOps. This would result in additional buffer
58+
// allocations during bufferization.
59+
// pm.addPass(createLinalgElementwiseOpFusionPass());
60+
pm.addPass(
61+
bufferization::createTensorCopyInsertionPass(getBufferizationOptions(
62+
/*analysisOnly=*/options.testBufferizationAnalysisOnly)));
63+
if (options.testBufferizationAnalysisOnly)
64+
return;
3565
pm.addPass(createSparsificationPass(options.sparsificationOptions()));
3666
pm.addPass(createSparseTensorConversionPass(
3767
options.sparseTensorConversionOptions()));
38-
pm.addNestedPass<func::FuncOp>(createLinalgBufferizePass());
39-
pm.addNestedPass<func::FuncOp>(vector::createVectorBufferizePass());
68+
pm.addPass(createDenseBufferizationPass(
69+
getBufferizationOptions(/*analysisOnly=*/false)));
70+
pm.addNestedPass<func::FuncOp>(
71+
mlir::bufferization::createFinalizingBufferizePass());
72+
// TODO(springerm): Add sparse support to the BufferDeallocation pass and add
73+
// it to this pipeline.
4074
pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
4175
pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
4276
pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
43-
pm.addPass(func::createFuncBufferizePass());
44-
pm.addPass(arith::createConstantBufferizePass());
45-
pm.addNestedPass<func::FuncOp>(createTensorBufferizePass());
46-
pm.addNestedPass<func::FuncOp>(
47-
mlir::bufferization::createFinalizingBufferizePass());
4877
pm.addPass(createLowerAffinePass());
4978
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
5079
pm.addPass(createMemRefToLLVMPass());

mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_mlir_dialect_library(MLIRSparseTensorTransforms
22
BufferizableOpInterfaceImpl.cpp
33
CodegenUtils.cpp
4+
DenseBufferizationPass.cpp
45
Sparsification.cpp
56
SparseTensorConversion.cpp
67
SparseTensorPasses.cpp
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//===- DenseBufferizationPass.cpp - Dense bufferization pass --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
10+
11+
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
12+
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
13+
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
14+
#include "mlir/Dialect/Func/IR/FuncOps.h"
15+
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
16+
17+
using namespace mlir;
18+
using namespace mlir::func;
19+
20+
namespace mlir {
21+
namespace sparse_tensor {
22+
23+
/// Return `true` if one of the given types is a sparse tensor type.
24+
static bool containsSparseTensor(TypeRange types) {
25+
for (Type t : types)
26+
if (getSparseTensorEncoding(t))
27+
return true;
28+
return false;
29+
}
30+
31+
/// A pass that bufferizes only dense tensor ops and ignores all sparse tensor
32+
/// ops. No buffer copies are inserted. All tensor OpOperands must be
33+
/// inplacable.
34+
class BufferizeDenseOpsPass
35+
: public PassWrapper<BufferizeDenseOpsPass, OperationPass<ModuleOp>> {
36+
public:
37+
BufferizeDenseOpsPass(
38+
const bufferization::OneShotBufferizationOptions &options)
39+
: PassWrapper<BufferizeDenseOpsPass, OperationPass<ModuleOp>>(),
40+
options(options) {}
41+
42+
void runOnOperation() override {
43+
// Disallow all sparse tensor ops, so that only dense tensor ops are
44+
// bufferized.
45+
bufferization::OpFilter opFilter;
46+
opFilter.allowOperation([&](Operation *op) {
47+
if (containsSparseTensor(TypeRange(op->getResults())) ||
48+
containsSparseTensor(TypeRange(op->getOperands())))
49+
return false;
50+
if (auto funcOp = dyn_cast<func::FuncOp>(op)) {
51+
FunctionType funcType = funcOp.getFunctionType();
52+
if (containsSparseTensor(funcType.getInputs()) ||
53+
containsSparseTensor(funcType.getResults()))
54+
return false;
55+
}
56+
return true;
57+
});
58+
59+
if (failed(bufferization::bufferizeOp(getOperation(), options,
60+
/*copyBeforeWrite=*/false,
61+
&opFilter)))
62+
signalPassFailure();
63+
}
64+
65+
private:
66+
bufferization::OneShotBufferizationOptions options;
67+
};
68+
} // namespace sparse_tensor
69+
} // namespace mlir
70+
71+
std::unique_ptr<Pass> mlir::createDenseBufferizationPass(
72+
const bufferization::OneShotBufferizationOptions &options) {
73+
return std::make_unique<mlir::sparse_tensor::BufferizeDenseOpsPass>(options);
74+
}

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "CodegenUtils.h"
1818

19+
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
1920
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
2021
#include "mlir/Dialect/Func/IR/FuncOps.h"
2122
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -30,6 +31,8 @@
3031

3132
using namespace mlir;
3233
using namespace mlir::sparse_tensor;
34+
using mlir::bufferization::BufferizableOpInterface;
35+
using mlir::bufferization::BufferizationDialect;
3336

3437
namespace {
3538

@@ -320,8 +323,8 @@ static Value genIndexAndValueForSparse(OpBuilder &builder, Location loc,
320323
return builder.create<tensor::ExtractOp>(loc, values, ivs[0]);
321324
}
322325

323-
/// Generates code to allocate a tensor of the given type, and zero
324-
/// initialize it. If the tensor type has any dynamic sizes, then the
326+
/// Generates code to allocate a buffer of the given type, and zero
327+
/// initialize it. If the buffer type has any dynamic sizes, then the
325328
/// `sizes` parameter should be as filled by sizesFromPtr(); that way
326329
/// we can reuse the genDimSizeCall() results generated by sizesFromPtr().
327330
static Value allocDenseTensor(OpBuilder &builder, Location loc,
@@ -340,6 +343,11 @@ static Value allocDenseTensor(OpBuilder &builder, Location loc,
340343
return mem;
341344
}
342345

346+
/// Generates code to deallocate a dense buffer.
347+
static void deallocDenseTensor(OpBuilder &builder, Location loc, Value buffer) {
348+
builder.create<memref::DeallocOp>(loc, buffer);
349+
}
350+
343351
/// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into
344352
/// the tensor created by allocDenseTensor(). The `rank` is the rank
345353
/// of the `tensor` and the length of `ind`.
@@ -618,6 +626,9 @@ class SparseTensorAllocConverter
618626
LogicalResult
619627
matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor,
620628
ConversionPatternRewriter &rewriter) const override {
629+
if (op.getCopy())
630+
return rewriter.notifyMatchFailure(op,
631+
"sparse tensor copy not implemented");
621632
RankedTensorType resType = op.getType();
622633
auto enc = getSparseTensorEncoding(resType);
623634
if (!enc)
@@ -743,6 +754,9 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
743754
Value iter = genNewCall(rewriter, op, params);
744755
Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
745756
Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
757+
Block *insertionBlock = rewriter.getInsertionBlock();
758+
// TODO: Dense buffers should be allocated/deallocated via the callback
759+
// in BufferizationOptions.
746760
Value dst = allocDenseTensor(rewriter, loc, dstTensorTp, sizes);
747761
SmallVector<Value> noArgs;
748762
SmallVector<Type> noTypes;
@@ -758,6 +772,11 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
758772
rewriter.setInsertionPointAfter(whileOp);
759773
genDelCOOCall(rewriter, op, elemTp, iter);
760774
rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, dst);
775+
// Deallocate the buffer.
776+
if (bufferization::allocationDoesNotEscape(op->getOpResult(0))) {
777+
rewriter.setInsertionPoint(insertionBlock->getTerminator());
778+
deallocDenseTensor(rewriter, loc, dst);
779+
}
761780
return success();
762781
}
763782
if (!encDst && !encSrc) {

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,17 @@ struct SparseTensorConversionPass
127127
});
128128
// The following operations and dialects may be introduced by the
129129
// rewriting rules, and are therefore marked as legal.
130-
target.addLegalOp<complex::ConstantOp, complex::NotEqualOp, linalg::FillOp,
130+
target.addLegalOp<bufferization::ToMemrefOp, bufferization::ToTensorOp,
131+
complex::ConstantOp, complex::NotEqualOp, linalg::FillOp,
131132
linalg::YieldOp, tensor::ExtractOp>();
132133
target.addLegalDialect<
133134
arith::ArithmeticDialect, bufferization::BufferizationDialect,
134135
LLVM::LLVMDialect, memref::MemRefDialect, scf::SCFDialect>();
136+
target.addDynamicallyLegalOp<bufferization::AllocTensorOp>(
137+
[&](bufferization::AllocTensorOp op) {
138+
// Dense tensors are legal, sparse tensors are not.
139+
return !static_cast<bool>(op.getType().getEncoding());
140+
});
135141
// Translate strategy flags to strategy options.
136142
SparseTensorConversionOptions options(
137143
sparseToSparseConversionStrategy(sparseToSparse));

mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Lines changed: 12 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -308,17 +308,6 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
308308
return true;
309309
}
310310

311-
/// Returns true if tensor has an in-place annotation.
312-
static bool isInPlace(Value val) {
313-
if (auto arg = val.dyn_cast<BlockArgument>())
314-
if (auto funcOp = dyn_cast<func::FuncOp>(arg.getOwner()->getParentOp()))
315-
if (auto attr = funcOp.getArgAttrOfType<BoolAttr>(
316-
arg.getArgNumber(),
317-
bufferization::BufferizableOpInterface::kInplaceableAttrName))
318-
return attr.getValue();
319-
return false;
320-
}
321-
322311
/// Returns true if tensor materializes uninitialized into the computation.
323312
static bool isMaterializing(Value val) {
324313
return val.getDefiningOp<linalg::InitTensorOp>() ||
@@ -355,9 +344,8 @@ static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op,
355344
return true;
356345
// A tensor expression with a sparse output tensor that changes its values
357346
// but not its nonzero structure, an operation called "simply dynamic" in
358-
// [Bik96,Ch9], is also admissable without special codegen, provided
359-
// the tensor's underlying sparse storage scheme can be modified in-place.
360-
if (merger.isSingleCondition(tensor, exp) && isInPlace(lhs->get()))
347+
// [Bik96,Ch9], is also admissable without special codegen.
348+
if (merger.isSingleCondition(tensor, exp))
361349
return true;
362350
// Accept "truly dynamic" if the output tensor materializes uninitialized
363351
// into the computation and insertions occur in lexicographic index order.
@@ -486,37 +474,19 @@ static Value genOutputBuffer(CodeGen &codegen, OpBuilder &builder,
486474
OpOperand *lhs = op.getOutputOperand(0);
487475
Value tensor = lhs->get();
488476
bool isInit = op.isInitTensor(lhs);
489-
// An output tensor that is in-place can simply materialize from the buffer
490-
// of the tensor that appears in the outs() clause. For updates, this has
491-
// the advantage that only the nonzero value are involved in the computation,
492-
// keeping the operation O(nnz). In all other cases, we are forced to zero
493-
// out the buffer to enforce the assumption above, which may negatively
494-
// impact running complexity (viz. O(n^2 + nnz) vs. O(nnz) for matrices).
477+
// An output tensor can simply materialize from the buffer of the tensor that
478+
// appears in the outs() clause. For updates, this has the advantage that only
479+
// the nonzero value are involved in the computation, keeping the operation
480+
// O(nnz). In all other cases, we are forced to zero out the buffer to enforce
481+
// the assumption above, which may negatively impact running complexity
482+
// (viz. O(n^2 + nnz) vs. O(nnz) for matrices).
495483
// TODO: use better analysis to avoid zeroing out the buffer?
496-
if (isInPlace(tensor)) {
497-
Value init =
498-
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
499-
if (!isInit) {
500-
Value zero = constantZero(builder, loc, denseTp.getElementType());
501-
builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{init});
502-
}
503-
return init;
504-
}
505-
// By default, a new buffer is allocated which is either set to zero (when
506-
// no updates occur or the tensor materializes into this computation) or
507-
// initialized to the value of the tensor defined in the outs() clause.
508-
// This is always correct (since it enforces all assumptions above) but
509-
// may negatively impact running complexity as explained above.
510-
Value alloc = builder.create<memref::AllocOp>(loc, denseTp, args);
511-
if (!isInit || isMaterializing(tensor)) {
484+
Value init = builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
485+
if (!isInit) {
512486
Value zero = constantZero(builder, loc, denseTp.getElementType());
513-
builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{alloc});
514-
} else {
515-
Value init =
516-
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
517-
builder.create<memref::CopyOp>(loc, init, alloc);
487+
builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{init});
518488
}
519-
return alloc;
489+
return init;
520490
}
521491

522492
/// Local bufferization of all dense and sparse data structures.

0 commit comments

Comments
 (0)