Skip to content

Commit a80f1f7

Browse files
jeanPerieryuxuanchen1997
authored andcommitted
[flang] handle alloca outside of entry blocks in MemoryAllocation (#98457)
Summary: This patch generalizes the MemoryAllocation pass (alloca -> heap) to handle fir.alloca regardless of their postion in the IR. Currently, it only dealt with fir.alloca in function entry blocks. The logic is placed in a utility that can be used to replace alloca in an operation on demand to whatever kind of allocation the utility user wants via callbacks (allocmem, or custom runtime calls to instrument the code...). To do so, a concept of ownership, that was already implied a bit and used in passes like stack-reclaim, is formalized. Any operation with the LoopLikeInterface, AutomaticAllocationScope, or IsolatedFromAbove owns the alloca directly nested inside its regions, and they must not be used after the operation. The pass then looks for the exit points of region with such interface, and use that to insert deallocation. If dominance is not proved, the pass fallbacks to storing the new address into a C pointer variable created in the entry of the owning region which allows inserting deallocation as needed, included near the alloca itself to avoid leaks when the alloca is executed multiple times due to block CFGs loops. This should fix #88344. In a next step, I will try to refactor lowering a bit to introduce lifetime operation for alloca so that the deallocation points can be inserted as soon as possible. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251596
1 parent dcf264d commit a80f1f7

File tree

9 files changed

+610
-108
lines changed

9 files changed

+610
-108
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ class ExtendedValue;
3838
class MutableBoxValue;
3939
class BoxValue;
4040

41+
/// Get the integer type with a pointer size.
42+
inline mlir::Type getIntPtrType(mlir::OpBuilder &builder) {
43+
// TODO: Delay the need of such type until codegen or find a way to use
44+
// llvm::DataLayout::getPointerSizeInBits here.
45+
return builder.getI64Type();
46+
}
47+
4148
//===----------------------------------------------------------------------===//
4249
// FirOpBuilder
4350
//===----------------------------------------------------------------------===//
@@ -143,11 +150,7 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
143150

144151
/// Get the integer type whose bit width corresponds to the width of pointer
145152
/// types, or is bigger.
146-
mlir::Type getIntPtrType() {
147-
// TODO: Delay the need of such type until codegen or find a way to use
148-
// llvm::DataLayout::getPointerSizeInBits here.
149-
return getI64Type();
150-
}
153+
mlir::Type getIntPtrType() { return fir::getIntPtrType(*this); }
151154

152155
/// Wrap `str` to a SymbolRefAttr.
153156
mlir::SymbolRefAttr getSymbolRefAttr(llvm::StringRef str) {
@@ -712,6 +715,11 @@ fir::BoxValue createBoxValue(fir::FirOpBuilder &builder, mlir::Location loc,
712715
mlir::Value createNullBoxProc(fir::FirOpBuilder &builder, mlir::Location loc,
713716
mlir::Type boxType);
714717

718+
/// Convert a value to a new type. Return the value directly if it has the right
719+
/// type.
720+
mlir::Value createConvert(mlir::OpBuilder &, mlir::Location, mlir::Type,
721+
mlir::Value);
722+
715723
/// Set internal linkage attribute on a function.
716724
void setInternalLinkage(mlir::func::FuncOp);
717725

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ def fir_AllocaOp : fir_Op<"alloca", [AttrSizedOperandSegments,
124124
Indeed, a user would likely expect a good Fortran compiler to perform such
125125
an optimization.
126126

127+
Stack allocations have a maximum lifetime concept: their uses must not
128+
exceed the lifetime of the closest parent operation with the
129+
AutomaticAllocationScope trait, IsIsolatedFromAbove trait, or
130+
LoopLikeOpInterface trait. This restriction is meant to ease the
131+
insertion of stack save and restore operations, and to ease the conversion
132+
of stack allocation into heap allocation.
133+
127134
Until Fortran 2018, procedures defaulted to non-recursive. A legal
128135
implementation could therefore convert stack allocations to global
129136
allocations. Such a conversion effectively adds the SAVE attribute to all
@@ -183,11 +190,17 @@ def fir_AllocaOp : fir_Op<"alloca", [AttrSizedOperandSegments,
183190
mlir::Type getAllocatedType();
184191
bool hasLenParams() { return !getTypeparams().empty(); }
185192
bool hasShapeOperands() { return !getShape().empty(); }
193+
bool isDynamic() {return hasLenParams() || hasShapeOperands();}
186194
unsigned numLenParams() { return getTypeparams().size(); }
187195
operand_range getLenParams() { return getTypeparams(); }
188196
unsigned numShapeOperands() { return getShape().size(); }
189197
operand_range getShapeOperands() { return getShape(); }
190198
static mlir::Type getRefTy(mlir::Type ty);
199+
/// Is this an operation that owns the alloca directly made in its region?
200+
static bool ownsNestedAlloca(mlir::Operation* op);
201+
/// Get the parent region that owns this alloca. Nullptr if none can be
202+
/// identified.
203+
mlir::Region* getOwnerRegion();
191204
}];
192205
}
193206

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===-- Optimizer/Transforms/MemoryUtils.h ----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// This file defines a utility to replace fir.alloca by dynamic allocation and
14+
// deallocation. The exact kind of dynamic allocation is left to be defined by
15+
// the utility user via callbacks (could be fir.allocmem or custom runtime
16+
// calls).
17+
//
18+
//===----------------------------------------------------------------------===//
19+
20+
#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_MEMORYUTILS_H
21+
#define FORTRAN_OPTIMIZER_TRANSFORMS_MEMORYUTILS_H
22+
23+
#include "flang/Optimizer/Dialect/FIROps.h"
24+
25+
namespace mlir {
26+
class RewriterBase;
27+
}
28+
29+
namespace fir {
30+
31+
/// Type of callbacks that indicate if a given fir.alloca must be
32+
/// rewritten.
33+
using MustRewriteCallBack = llvm::function_ref<bool(fir::AllocaOp)>;
34+
35+
/// Type of callbacks that produce the replacement for a given fir.alloca.
36+
/// It is provided extra information about the dominance of the deallocation
37+
/// points that have been identified, and may refuse to replace the alloca,
38+
/// even if the MustRewriteCallBack previously returned true, in which case
39+
/// it should return a null value.
40+
/// The callback should not delete the alloca, the utility will do it.
41+
using AllocaRewriterCallBack = llvm::function_ref<mlir::Value(
42+
mlir::OpBuilder &, fir::AllocaOp, bool allocaDominatesDeallocLocations)>;
43+
/// Type of callbacks that must generate deallocation of storage obtained via
44+
/// AllocaRewriterCallBack calls.
45+
using DeallocCallBack =
46+
llvm::function_ref<void(mlir::Location, mlir::OpBuilder &, mlir::Value)>;
47+
48+
/// Utility to replace fir.alloca by dynamic allocations inside \p parentOp.
49+
/// \p MustRewriteCallBack lets the user control which fir.alloca should be
50+
/// replaced. \p AllocaRewriterCallBack lets the user define how the new memory
51+
/// should be allocated. \p DeallocCallBack lets the user decide how the memory
52+
/// should be deallocated. The boolean result indicates if the utility succeeded
53+
/// to replace all fir.alloca as requested by the user. Causes of failures are
54+
/// the presence of unregistered operations, or OpenMP/ACC recipe operations
55+
/// that return memory allocated inside their region.
56+
bool replaceAllocas(mlir::RewriterBase &rewriter, mlir::Operation *parentOp,
57+
MustRewriteCallBack, AllocaRewriterCallBack,
58+
DeallocCallBack);
59+
60+
} // namespace fir
61+
62+
#endif // FORTRAN_OPTIMIZER_TRANSFORMS_MEMORYUTILS_H

flang/lib/Optimizer/Builder/FIRBuilder.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,15 +455,21 @@ mlir::Value fir::FirOpBuilder::convertWithSemantics(
455455
return createConvert(loc, toTy, val);
456456
}
457457

458-
mlir::Value fir::FirOpBuilder::createConvert(mlir::Location loc,
459-
mlir::Type toTy, mlir::Value val) {
458+
mlir::Value fir::factory::createConvert(mlir::OpBuilder &builder,
459+
mlir::Location loc, mlir::Type toTy,
460+
mlir::Value val) {
460461
if (val.getType() != toTy) {
461462
assert(!fir::isa_derived(toTy));
462-
return create<fir::ConvertOp>(loc, toTy, val);
463+
return builder.create<fir::ConvertOp>(loc, toTy, val);
463464
}
464465
return val;
465466
}
466467

468+
mlir::Value fir::FirOpBuilder::createConvert(mlir::Location loc,
469+
mlir::Type toTy, mlir::Value val) {
470+
return fir::factory::createConvert(*this, loc, toTy, val);
471+
}
472+
467473
void fir::FirOpBuilder::createStoreWithConvert(mlir::Location loc,
468474
mlir::Value val,
469475
mlir::Value addr) {

flang/lib/Optimizer/Dialect/FIROps.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,27 @@ llvm::LogicalResult fir::AllocaOp::verify() {
275275
return mlir::success();
276276
}
277277

278+
bool fir::AllocaOp::ownsNestedAlloca(mlir::Operation *op) {
279+
return op->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>() ||
280+
op->hasTrait<mlir::OpTrait::AutomaticAllocationScope>() ||
281+
mlir::isa<mlir::LoopLikeOpInterface>(*op);
282+
}
283+
284+
mlir::Region *fir::AllocaOp::getOwnerRegion() {
285+
mlir::Operation *currentOp = getOperation();
286+
while (mlir::Operation *parentOp = currentOp->getParentOp()) {
287+
// If the operation was not registered, inquiries about its traits will be
288+
// incorrect and it is not possible to reason about the operation. This
289+
// should not happen in a normal Fortran compilation flow, but be foolproof.
290+
if (!parentOp->isRegistered())
291+
return nullptr;
292+
if (fir::AllocaOp::ownsNestedAlloca(parentOp))
293+
return currentOp->getParentRegion();
294+
currentOp = parentOp;
295+
}
296+
return nullptr;
297+
}
298+
278299
//===----------------------------------------------------------------------===//
279300
// AllocMemOp
280301
//===----------------------------------------------------------------------===//

flang/lib/Optimizer/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_flang_library(FIRTransforms
1010
ControlFlowConverter.cpp
1111
ArrayValueCopy.cpp
1212
ExternalNameConversion.cpp
13+
MemoryUtils.cpp
1314
MemoryAllocation.cpp
1415
StackArrays.cpp
1516
MemRefDataFlowOpt.cpp

flang/lib/Optimizer/Transforms/MemoryAllocation.cpp

Lines changed: 43 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "flang/Optimizer/Dialect/FIRDialect.h"
1010
#include "flang/Optimizer/Dialect/FIROps.h"
1111
#include "flang/Optimizer/Dialect/FIRType.h"
12+
#include "flang/Optimizer/Transforms/MemoryUtils.h"
1213
#include "flang/Optimizer/Transforms/Passes.h"
1314
#include "mlir/Dialect/Func/IR/FuncOps.h"
1415
#include "mlir/IR/Diagnostics.h"
@@ -27,50 +28,18 @@ namespace fir {
2728
// Number of elements in an array does not determine where it is allocated.
2829
static constexpr std::size_t unlimitedArraySize = ~static_cast<std::size_t>(0);
2930

30-
namespace {
31-
class ReturnAnalysis {
32-
public:
33-
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ReturnAnalysis)
34-
35-
ReturnAnalysis(mlir::Operation *op) {
36-
if (auto func = mlir::dyn_cast<mlir::func::FuncOp>(op))
37-
for (mlir::Block &block : func)
38-
for (mlir::Operation &i : block)
39-
if (mlir::isa<mlir::func::ReturnOp>(i)) {
40-
returnMap[op].push_back(&i);
41-
break;
42-
}
43-
}
44-
45-
llvm::SmallVector<mlir::Operation *> getReturns(mlir::Operation *func) const {
46-
auto iter = returnMap.find(func);
47-
if (iter != returnMap.end())
48-
return iter->second;
49-
return {};
50-
}
51-
52-
private:
53-
llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
54-
returnMap;
55-
};
56-
} // namespace
57-
5831
/// Return `true` if this allocation is to remain on the stack (`fir.alloca`).
5932
/// Otherwise the allocation should be moved to the heap (`fir.allocmem`).
6033
static inline bool
61-
keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry,
34+
keepStackAllocation(fir::AllocaOp alloca,
6235
const fir::MemoryAllocationOptOptions &options) {
63-
// Limitation: only arrays allocated on the stack in the entry block are
64-
// considered for now.
65-
// TODO: Generalize the algorithm and placement of the freemem nodes.
66-
if (alloca->getBlock() != entry)
67-
return true;
36+
// Move all arrays and character with runtime determined size to the heap.
37+
if (options.dynamicArrayOnHeap && alloca.isDynamic())
38+
return false;
39+
// TODO: use data layout to reason in terms of byte size to cover all "big"
40+
// entities, which may be scalar derived types.
6841
if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(alloca.getInType())) {
69-
if (fir::hasDynamicSize(seqTy)) {
70-
// Move all arrays with runtime determined size to the heap.
71-
if (options.dynamicArrayOnHeap)
72-
return false;
73-
} else {
42+
if (!fir::hasDynamicSize(seqTy)) {
7443
std::int64_t numberOfElements = 1;
7544
for (std::int64_t i : seqTy.getShape()) {
7645
numberOfElements *= i;
@@ -82,58 +51,37 @@ keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry,
8251
// the heap.
8352
if (static_cast<std::size_t>(numberOfElements) >
8453
options.maxStackArraySize) {
85-
LLVM_DEBUG(llvm::dbgs()
86-
<< "memory allocation opt: found " << alloca << '\n');
8754
return false;
8855
}
8956
}
9057
}
9158
return true;
9259
}
9360

94-
namespace {
95-
class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
96-
public:
97-
using OpRewritePattern::OpRewritePattern;
98-
99-
AllocaOpConversion(mlir::MLIRContext *ctx,
100-
llvm::ArrayRef<mlir::Operation *> rets)
101-
: OpRewritePattern(ctx), returnOps(rets) {}
102-
103-
llvm::LogicalResult
104-
matchAndRewrite(fir::AllocaOp alloca,
105-
mlir::PatternRewriter &rewriter) const override {
106-
auto loc = alloca.getLoc();
107-
mlir::Type varTy = alloca.getInType();
108-
auto unpackName =
109-
[](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
110-
if (opt)
111-
return *opt;
112-
return {};
113-
};
114-
auto uniqName = unpackName(alloca.getUniqName());
115-
auto bindcName = unpackName(alloca.getBindcName());
116-
auto heap = rewriter.create<fir::AllocMemOp>(
117-
loc, varTy, uniqName, bindcName, alloca.getTypeparams(),
118-
alloca.getShape());
119-
auto insPt = rewriter.saveInsertionPoint();
120-
for (mlir::Operation *retOp : returnOps) {
121-
rewriter.setInsertionPoint(retOp);
122-
[[maybe_unused]] auto free = rewriter.create<fir::FreeMemOp>(loc, heap);
123-
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free
124-
<< " for " << heap << '\n');
125-
}
126-
rewriter.restoreInsertionPoint(insPt);
127-
rewriter.replaceOpWithNewOp<fir::ConvertOp>(
128-
alloca, fir::ReferenceType::get(varTy), heap);
129-
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca
130-
<< " with " << heap << '\n');
131-
return mlir::success();
132-
}
61+
static mlir::Value genAllocmem(mlir::OpBuilder &builder, fir::AllocaOp alloca,
62+
bool deallocPointsDominateAlloc) {
63+
mlir::Type varTy = alloca.getInType();
64+
auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
65+
if (opt)
66+
return *opt;
67+
return {};
68+
};
69+
llvm::StringRef uniqName = unpackName(alloca.getUniqName());
70+
llvm::StringRef bindcName = unpackName(alloca.getBindcName());
71+
auto heap = builder.create<fir::AllocMemOp>(alloca.getLoc(), varTy, uniqName,
72+
bindcName, alloca.getTypeparams(),
73+
alloca.getShape());
74+
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca
75+
<< " with " << heap << '\n');
76+
return heap;
77+
}
13378

134-
private:
135-
llvm::ArrayRef<mlir::Operation *> returnOps;
136-
};
79+
static void genFreemem(mlir::Location loc, mlir::OpBuilder &builder,
80+
mlir::Value allocmem) {
81+
[[maybe_unused]] auto free = builder.create<fir::FreeMemOp>(loc, allocmem);
82+
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free
83+
<< " for " << allocmem << '\n');
84+
}
13785

13886
/// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based
13987
/// on heuristics and settings. The intention is to allow better performance and
@@ -144,6 +92,7 @@ class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
14492
/// make it a heap allocation.
14593
/// 2. If a stack allocation is an array with a runtime evaluated size make
14694
/// it a heap allocation.
95+
namespace {
14796
class MemoryAllocationOpt
14897
: public fir::impl::MemoryAllocationOptBase<MemoryAllocationOpt> {
14998
public:
@@ -184,23 +133,17 @@ class MemoryAllocationOpt
184133
// If func is a declaration, skip it.
185134
if (func.empty())
186135
return;
187-
188-
const auto &analysis = getAnalysis<ReturnAnalysis>();
189-
190-
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
191-
mlir::func::FuncDialect>();
192-
target.addDynamicallyLegalOp<fir::AllocaOp>([&](fir::AllocaOp alloca) {
193-
return keepStackAllocation(alloca, &func.front(), options);
194-
});
195-
196-
llvm::SmallVector<mlir::Operation *> returnOps = analysis.getReturns(func);
197-
patterns.insert<AllocaOpConversion>(context, returnOps);
198-
if (mlir::failed(
199-
mlir::applyPartialConversion(func, target, std::move(patterns)))) {
200-
mlir::emitError(func.getLoc(),
201-
"error in memory allocation optimization\n");
202-
signalPassFailure();
203-
}
136+
auto tryReplacing = [&](fir::AllocaOp alloca) {
137+
bool res = !keepStackAllocation(alloca, options);
138+
if (res) {
139+
LLVM_DEBUG(llvm::dbgs()
140+
<< "memory allocation opt: found " << alloca << '\n');
141+
}
142+
return res;
143+
};
144+
mlir::IRRewriter rewriter(context);
145+
fir::replaceAllocas(rewriter, func.getOperation(), tryReplacing,
146+
genAllocmem, genFreemem);
204147
}
205148

206149
private:

0 commit comments

Comments
 (0)