Skip to content

[flang] Fixed StackArrays assertion after #121919. #122550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions flang/lib/Optimizer/Transforms/StackArrays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,15 +198,19 @@ class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {

/// Determine where to insert the alloca operation. The returned value should
/// be checked to see if it is inside a loop
static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc);
static InsertionPoint
findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc,
const llvm::SmallVector<mlir::Operation *> &freeOps);

private:
/// Handle to the DFA (already run)
const StackArraysAnalysisWrapper::AllocMemMap &candidateOps;

/// If we failed to find an insertion point not inside a loop, see if it would
/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc);
static InsertionPoint findAllocaLoopInsertionPoint(
fir::AllocMemOp &oldAlloc,
const llvm::SmallVector<mlir::Operation *> &freeOps);

/// Returns the alloca if it was successfully inserted, otherwise {}
std::optional<fir::AllocaOp>
Expand Down Expand Up @@ -484,14 +488,31 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
llvm::DenseSet<mlir::Value> freedValues;
point.appendFreedValues(freedValues);

// Find all fir.freemem operations corresponding to fir.allocmem
// in freedValues. It is best to find the association going back
// from fir.freemem to fir.allocmem through the def-use chains,
// so that we can use lookThroughDeclaresAndConverts same way
// the AllocationAnalysis is handling them.
llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
allocToFreeMemMap;
func->walk([&](fir::FreeMemOp freeOp) {
mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref());
if (!freedValues.count(memref))
return;

auto allocMem = memref.getDefiningOp<fir::AllocMemOp>();
allocToFreeMemMap[allocMem].push_back(freeOp);
});

// We only replace allocations which are definately freed on all routes
// through the function because otherwise the allocation may have an intende
// lifetime longer than the current stack frame (e.g. a heap allocation which
// is then freed by another function).
for (mlir::Value freedValue : freedValues) {
fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>();
InsertionPoint insertionPoint =
AllocMemConversion::findAllocaInsertionPoint(allocmem);
AllocMemConversion::findAllocaInsertionPoint(
allocmem, allocToFreeMemMap[allocmem]);
if (insertionPoint)
candidateOps.insert({allocmem, insertionPoint});
}
Expand Down Expand Up @@ -578,8 +599,9 @@ static bool isInLoop(mlir::Operation *op) {
op->getParentOfType<mlir::LoopLikeOpInterface>();
}

InsertionPoint
AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
InsertionPoint AllocMemConversion::findAllocaInsertionPoint(
fir::AllocMemOp &oldAlloc,
const llvm::SmallVector<mlir::Operation *> &freeOps) {
// Ideally the alloca should be inserted at the end of the function entry
// block so that we do not allocate stack space in a loop. However,
// the operands to the alloca may not be available that early, so insert it
Expand All @@ -596,7 +618,7 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
if (isInLoop(oldAllocOp)) {
// where we want to put it is in a loop, and even the old location is in
// a loop. Give up.
return findAllocaLoopInsertionPoint(oldAlloc);
return findAllocaLoopInsertionPoint(oldAlloc, freeOps);
}
return {oldAllocOp};
}
Expand Down Expand Up @@ -657,28 +679,14 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
return checkReturn(&entryBlock);
}

InsertionPoint
AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint(
fir::AllocMemOp &oldAlloc,
const llvm::SmallVector<mlir::Operation *> &freeOps) {
mlir::Operation *oldAllocOp = oldAlloc;
// This is only called as a last resort. We should try to insert at the
// location of the old allocation, which is inside of a loop, using
// llvm.stacksave/llvm.stackrestore

// find freemem ops
llvm::SmallVector<mlir::Operation *, 1> freeOps;

for (mlir::Operation *user : oldAllocOp->getUsers()) {
if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
for (mlir::Operation *user : declareOp->getUsers()) {
if (mlir::isa<fir::FreeMemOp>(user))
freeOps.push_back(user);
}
}

if (mlir::isa<fir::FreeMemOp>(user))
freeOps.push_back(user);
}

assert(freeOps.size() && "DFA should only return freed memory");

// Don't attempt to reason about a stacksave/stackrestore between different
Expand Down
42 changes: 42 additions & 0 deletions flang/test/Transforms/stack-arrays.fir
Original file line number Diff line number Diff line change
Expand Up @@ -418,3 +418,45 @@ func.func @lookthrough() {
// CHECK: func.func @lookthrough() {
// CHECK: fir.alloca !fir.array<42xi32>
// CHECK-NOT: fir.freemem

// StackArrays is better to find fir.freemem ops corresponding to fir.allocmem
// using the same look through mechanism as during the allocation analysis,
// looking through fir.convert and fir.declare.
func.func @finding_freemem_in_block() {
%c0 = arith.constant 0 : index
%c10_i32 = arith.constant 10 : i32
%c1_i32 = arith.constant 1 : i32
%0 = fir.alloca i32 {bindc_name = "k", uniq_name = "k"}
%1 = fir.declare %0 {uniq_name = "k"} : (!fir.ref<i32>) -> !fir.ref<i32>
fir.store %c1_i32 to %1 : !fir.ref<i32>
cf.br ^bb1
^bb1: // 2 preds: ^bb0, ^bb2
%2 = fir.load %1 : !fir.ref<i32>
%3 = arith.cmpi sle, %2, %c10_i32 : i32
cf.cond_br %3, ^bb2, ^bb3
^bb2: // pred: ^bb1
%4 = fir.declare %1 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "x"} : (!fir.ref<i32>) -> !fir.ref<i32>
%5 = fir.load %4 : !fir.ref<i32>
%6 = fir.convert %5 : (i32) -> index
%7 = arith.cmpi sgt, %6, %c0 : index
%8 = arith.select %7, %6, %c0 : index
%9 = fir.shape %8 : (index) -> !fir.shape<1>
%10 = fir.allocmem !fir.array<?xi32>, %8 {bindc_name = ".tmp.expr_result", uniq_name = ""}
%11 = fir.convert %10 : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
%12 = fir.declare %11(%9) {uniq_name = ".tmp.expr_result"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
%13 = fir.embox %12(%9) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
%14 = fir.call @_QPfunc(%1) fastmath<fast> : (!fir.ref<i32>) -> !fir.array<?xi32>
fir.save_result %14 to %12(%9) : !fir.array<?xi32>, !fir.ref<!fir.array<?xi32>>, !fir.shape<1>
fir.call @_QPsub(%13) fastmath<fast> : (!fir.box<!fir.array<?xi32>>) -> ()
%15 = fir.convert %12 : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
fir.freemem %15 : !fir.heap<!fir.array<?xi32>>
%16 = fir.load %1 : !fir.ref<i32>
%17 = arith.addi %16, %c1_i32 : i32
fir.store %17 to %1 : !fir.ref<i32>
cf.br ^bb1
^bb3: // pred: ^bb1
return
}
// CHECK: func.func @finding_freemem_in_block() {
// CHECK: fir.alloca !fir.array<?xi32>
// CHECK-NOT: fir.freemem
Loading