Skip to content

[flang][cuda] Use fir.cuda_deallocate for automatic deallocation #89662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion flang/include/flang/Lower/Allocatable.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,14 @@ void genDeallocateStmt(AbstractConverter &converter,

void genDeallocateBox(AbstractConverter &converter,
const fir::MutableBoxValue &box, mlir::Location loc,
const Fortran::semantics::Symbol *sym = nullptr,
mlir::Value declaredTypeDesc = {});

/// Deallocate an allocatable if it is allocated at the end of its lifetime.
void genDeallocateIfAllocated(AbstractConverter &converter,
const fir::MutableBoxValue &box,
mlir::Location loc);
mlir::Location loc,
const Fortran::semantics::Symbol *sym = nullptr);

/// Create a MutableBoxValue for an allocatable or pointer entity.
/// If the variables is a local variable that is not a dummy, it will be
Expand Down
12 changes: 7 additions & 5 deletions flang/lib/Lower/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,18 +859,20 @@ genDeallocate(fir::FirOpBuilder &builder,
void Fortran::lower::genDeallocateBox(
Fortran::lower::AbstractConverter &converter,
const fir::MutableBoxValue &box, mlir::Location loc,
mlir::Value declaredTypeDesc) {
const Fortran::semantics::Symbol *sym, mlir::Value declaredTypeDesc) {
const Fortran::lower::SomeExpr *statExpr = nullptr;
const Fortran::lower::SomeExpr *errMsgExpr = nullptr;
ErrorManager errorManager;
errorManager.init(converter, loc, statExpr, errMsgExpr);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc);
genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc,
sym);
}

void Fortran::lower::genDeallocateIfAllocated(
Fortran::lower::AbstractConverter &converter,
const fir::MutableBoxValue &box, mlir::Location loc) {
const fir::MutableBoxValue &box, mlir::Location loc,
const Fortran::semantics::Symbol *sym) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Value isAllocated =
fir::factory::genIsAllocatedOrAssociatedTest(builder, loc, box);
Expand All @@ -880,9 +882,9 @@ void Fortran::lower::genDeallocateIfAllocated(
eleType.isa<fir::RecordType>() && box.isPolymorphic()) {
mlir::Value declaredTypeDesc = builder.create<fir::TypeDescOp>(
loc, mlir::TypeAttr::get(eleType));
genDeallocateBox(converter, box, loc, declaredTypeDesc);
genDeallocateBox(converter, box, loc, sym, declaredTypeDesc);
} else {
genDeallocateBox(converter, box, loc);
genDeallocateBox(converter, box, loc, sym);
}
})
.end();
Expand Down
5 changes: 3 additions & 2 deletions flang/lib/Lower/ConvertVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,13 +916,14 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
break;
case VariableCleanUp::Deallocate:
auto *converterPtr = &converter;
converter.getFctCtx().attachCleanup([converterPtr, loc, exv]() {
auto *sym = &var.getSymbol();
converter.getFctCtx().attachCleanup([converterPtr, loc, exv, sym]() {
const fir::MutableBoxValue *mutableBox =
exv.getBoxOf<fir::MutableBoxValue>();
assert(mutableBox &&
"trying to deallocate entity not lowered as allocatable");
Fortran::lower::genDeallocateIfAllocated(*converterPtr, *mutableBox,
loc);
loc, sym);
});
}
}
Expand Down
32 changes: 19 additions & 13 deletions flang/lib/Lower/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4187,21 +4187,27 @@ void Fortran::lower::attachDeclarePostDeallocAction(

std::stringstream fctName;
fctName << converter.mangleName(sym) << declarePostDeallocSuffix.str();
mlir::Operation &op = builder.getInsertionBlock()->back();
if (op.hasAttr(mlir::acc::getDeclareActionAttrName())) {
auto attr = op.getAttrOfType<mlir::acc::DeclareActionAttr>(
mlir::Operation *op = &builder.getInsertionBlock()->back();
if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) {
assert(resOp.getOperands().size() == 0 &&
"expect only fir.result op with no operand");
op = op->getPrevNode();
}
assert(op && "expect operation to attach the post deallocation action");
if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) {
auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>(
mlir::acc::getDeclareActionAttrName());
op.setAttr(mlir::acc::getDeclareActionAttrName(),
mlir::acc::DeclareActionAttr::get(
builder.getContext(), attr.getPreAlloc(),
attr.getPostAlloc(), attr.getPreDealloc(),
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
op->setAttr(mlir::acc::getDeclareActionAttrName(),
mlir::acc::DeclareActionAttr::get(
builder.getContext(), attr.getPreAlloc(),
attr.getPostAlloc(), attr.getPreDealloc(),
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
} else {
op.setAttr(mlir::acc::getDeclareActionAttrName(),
mlir::acc::DeclareActionAttr::get(
builder.getContext(),
/*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{},
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
op->setAttr(mlir::acc::getDeclareActionAttrName(),
mlir::acc::DeclareActionAttr::get(
builder.getContext(),
/*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{},
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
}
}

Expand Down
30 changes: 30 additions & 0 deletions flang/test/Lower/CUDA/cuda-allocatable.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ end subroutine

! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32

! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
! CHECK: %[[ADDR_I64:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<?xf32>>) -> i64
! CHECK: %[[C0:.*]] = arith.constant 0 : i64
! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
! CHECK: fir.if %[[NE_C0]] {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: }

subroutine sub2()
real, allocatable, managed :: a(:)
integer :: istat
Expand All @@ -37,6 +46,10 @@ end subroutine
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>

! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
! CHECK: }

subroutine sub3()
integer, allocatable, pinned :: a(:,:)
logical :: plog
Expand All @@ -50,6 +63,9 @@ end subroutine
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
! CHECK-2: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
! CHECK: }

subroutine sub4()
real, allocatable, device :: a(:)
Expand All @@ -65,6 +81,9 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: }

subroutine sub5()
real, allocatable, device :: a(:)
Expand All @@ -80,6 +99,11 @@ end subroutine
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: fir.if
! CHECK: fir.freemem
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: }

subroutine sub6()
real, allocatable, device :: a(:)
Expand All @@ -95,6 +119,9 @@ end subroutine
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableApplyMold
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: }

subroutine sub7()
real, allocatable, device :: a(:)
Expand All @@ -120,3 +147,6 @@ end subroutine
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: }
5 changes: 5 additions & 0 deletions flang/test/Lower/OpenACC/acc-declare.f90
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ subroutine acc_declare_allocate()
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xi32>>
! CHECK: fir.store %{{.*}} to %{{.*}} {acc.declare_action = #acc.declare_action<postDealloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>

! CHECK: fir.if
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xi32>>
! CHECK: fir.store %{{.*}} to %{{.*}}#1 {acc.declare_action = #acc.declare_action<postDealloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: }

end subroutine

! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
Expand Down