Skip to content

Commit 7c0da79

Browse files
authored
[flang][cuda] Use fir.cuda_deallocate for automatic deallocation (llvm#89662)
Automatic deallocation of allocatable that are cuda device variable must use the fir.cuda_deallocate operation. This patch update the automatic deallocation code generation to use this operation when the variable is a cuda variable. This patch has also the side effect to correctly call `attachDeclarePostDeallocAction` for OpenACC declare variable on automatic deallocation as well. Update the code in `attachDeclarePostDeallocAction` so we do not attach on fir.result but on the correct last op.
1 parent 31e769c commit 7c0da79

File tree

6 files changed

+67
-21
lines changed

6 files changed

+67
-21
lines changed

flang/include/flang/Lower/Allocatable.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ void genDeallocateStmt(AbstractConverter &converter,
5555

5656
void genDeallocateBox(AbstractConverter &converter,
5757
const fir::MutableBoxValue &box, mlir::Location loc,
58+
const Fortran::semantics::Symbol *sym = nullptr,
5859
mlir::Value declaredTypeDesc = {});
5960

6061
/// Deallocate an allocatable if it is allocated at the end of its lifetime.
6162
void genDeallocateIfAllocated(AbstractConverter &converter,
6263
const fir::MutableBoxValue &box,
63-
mlir::Location loc);
64+
mlir::Location loc,
65+
const Fortran::semantics::Symbol *sym = nullptr);
6466

6567
/// Create a MutableBoxValue for an allocatable or pointer entity.
6668
/// If the variables is a local variable that is not a dummy, it will be

flang/lib/Lower/Allocatable.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -859,18 +859,20 @@ genDeallocate(fir::FirOpBuilder &builder,
859859
void Fortran::lower::genDeallocateBox(
860860
Fortran::lower::AbstractConverter &converter,
861861
const fir::MutableBoxValue &box, mlir::Location loc,
862-
mlir::Value declaredTypeDesc) {
862+
const Fortran::semantics::Symbol *sym, mlir::Value declaredTypeDesc) {
863863
const Fortran::lower::SomeExpr *statExpr = nullptr;
864864
const Fortran::lower::SomeExpr *errMsgExpr = nullptr;
865865
ErrorManager errorManager;
866866
errorManager.init(converter, loc, statExpr, errMsgExpr);
867867
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
868-
genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc);
868+
genDeallocate(builder, converter, loc, box, errorManager, declaredTypeDesc,
869+
sym);
869870
}
870871

871872
void Fortran::lower::genDeallocateIfAllocated(
872873
Fortran::lower::AbstractConverter &converter,
873-
const fir::MutableBoxValue &box, mlir::Location loc) {
874+
const fir::MutableBoxValue &box, mlir::Location loc,
875+
const Fortran::semantics::Symbol *sym) {
874876
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
875877
mlir::Value isAllocated =
876878
fir::factory::genIsAllocatedOrAssociatedTest(builder, loc, box);
@@ -880,9 +882,9 @@ void Fortran::lower::genDeallocateIfAllocated(
880882
eleType.isa<fir::RecordType>() && box.isPolymorphic()) {
881883
mlir::Value declaredTypeDesc = builder.create<fir::TypeDescOp>(
882884
loc, mlir::TypeAttr::get(eleType));
883-
genDeallocateBox(converter, box, loc, declaredTypeDesc);
885+
genDeallocateBox(converter, box, loc, sym, declaredTypeDesc);
884886
} else {
885-
genDeallocateBox(converter, box, loc);
887+
genDeallocateBox(converter, box, loc, sym);
886888
}
887889
})
888890
.end();

flang/lib/Lower/ConvertVariable.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -916,13 +916,14 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
916916
break;
917917
case VariableCleanUp::Deallocate:
918918
auto *converterPtr = &converter;
919-
converter.getFctCtx().attachCleanup([converterPtr, loc, exv]() {
919+
auto *sym = &var.getSymbol();
920+
converter.getFctCtx().attachCleanup([converterPtr, loc, exv, sym]() {
920921
const fir::MutableBoxValue *mutableBox =
921922
exv.getBoxOf<fir::MutableBoxValue>();
922923
assert(mutableBox &&
923924
"trying to deallocate entity not lowered as allocatable");
924925
Fortran::lower::genDeallocateIfAllocated(*converterPtr, *mutableBox,
925-
loc);
926+
loc, sym);
926927
});
927928
}
928929
}

flang/lib/Lower/OpenACC.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4187,21 +4187,27 @@ void Fortran::lower::attachDeclarePostDeallocAction(
41874187

41884188
std::stringstream fctName;
41894189
fctName << converter.mangleName(sym) << declarePostDeallocSuffix.str();
4190-
mlir::Operation &op = builder.getInsertionBlock()->back();
4191-
if (op.hasAttr(mlir::acc::getDeclareActionAttrName())) {
4192-
auto attr = op.getAttrOfType<mlir::acc::DeclareActionAttr>(
4190+
mlir::Operation *op = &builder.getInsertionBlock()->back();
4191+
if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) {
4192+
assert(resOp.getOperands().size() == 0 &&
4193+
"expect only fir.result op with no operand");
4194+
op = op->getPrevNode();
4195+
}
4196+
assert(op && "expect operation to attach the post deallocation action");
4197+
if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) {
4198+
auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>(
41934199
mlir::acc::getDeclareActionAttrName());
4194-
op.setAttr(mlir::acc::getDeclareActionAttrName(),
4195-
mlir::acc::DeclareActionAttr::get(
4196-
builder.getContext(), attr.getPreAlloc(),
4197-
attr.getPostAlloc(), attr.getPreDealloc(),
4198-
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
4200+
op->setAttr(mlir::acc::getDeclareActionAttrName(),
4201+
mlir::acc::DeclareActionAttr::get(
4202+
builder.getContext(), attr.getPreAlloc(),
4203+
attr.getPostAlloc(), attr.getPreDealloc(),
4204+
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
41994205
} else {
4200-
op.setAttr(mlir::acc::getDeclareActionAttrName(),
4201-
mlir::acc::DeclareActionAttr::get(
4202-
builder.getContext(),
4203-
/*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{},
4204-
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
4206+
op->setAttr(mlir::acc::getDeclareActionAttrName(),
4207+
mlir::acc::DeclareActionAttr::get(
4208+
builder.getContext(),
4209+
/*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{},
4210+
/*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
42054211
}
42064212
}
42074213

flang/test/Lower/CUDA/cuda-allocatable.cuf

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ end subroutine
1717

1818
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
1919

20+
! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
21+
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
22+
! CHECK: %[[ADDR_I64:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<?xf32>>) -> i64
23+
! CHECK: %[[C0:.*]] = arith.constant 0 : i64
24+
! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
25+
! CHECK: fir.if %[[NE_C0]] {
26+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
27+
! CHECK: }
28+
2029
subroutine sub2()
2130
real, allocatable, managed :: a(:)
2231
integer :: istat
@@ -37,6 +46,10 @@ end subroutine
3746
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
3847
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
3948

49+
! CHECK: fir.if %{{.*}} {
50+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
51+
! CHECK: }
52+
4053
subroutine sub3()
4154
integer, allocatable, pinned :: a(:,:)
4255
logical :: plog
@@ -50,6 +63,9 @@ end subroutine
5063
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
5164
! CHECK-2: fir.call @_FortranAAllocatableSetBounds
5265
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
66+
! CHECK: fir.if %{{.*}} {
67+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
68+
! CHECK: }
5369

5470
subroutine sub4()
5571
real, allocatable, device :: a(:)
@@ -65,6 +81,9 @@ end subroutine
6581
! CHECK: fir.call @_FortranAAllocatableSetBounds
6682
! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
6783
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
84+
! CHECK: fir.if %{{.*}} {
85+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
86+
! CHECK: }
6887

6988
subroutine sub5()
7089
real, allocatable, device :: a(:)
@@ -80,6 +99,11 @@ end subroutine
8099
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
81100
! CHECK: fir.call @_FortranAAllocatableSetBounds
82101
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
102+
! CHECK: fir.if
103+
! CHECK: fir.freemem
104+
! CHECK: fir.if %{{.*}} {
105+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
106+
! CHECK: }
83107

84108
subroutine sub6()
85109
real, allocatable, device :: a(:)
@@ -95,6 +119,9 @@ end subroutine
95119
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
96120
! CHECK: fir.call @_FortranAAllocatableApplyMold
97121
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
122+
! CHECK: fir.if %{{.*}} {
123+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
124+
! CHECK: }
98125

99126
subroutine sub7()
100127
real, allocatable, device :: a(:)
@@ -120,3 +147,6 @@ end subroutine
120147
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
121148
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
122149
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
150+
! CHECK: fir.if %{{.*}} {
151+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
152+
! CHECK: }

flang/test/Lower/OpenACC/acc-declare.f90

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,11 @@ subroutine acc_declare_allocate()
245245
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xi32>>
246246
! CHECK: fir.store %{{.*}} to %{{.*}} {acc.declare_action = #acc.declare_action<postDealloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
247247

248+
! CHECK: fir.if
249+
! CHECK: fir.freemem %{{.*}} : !fir.heap<!fir.array<?xi32>>
250+
! CHECK: fir.store %{{.*}} to %{{.*}}#1 {acc.declare_action = #acc.declare_action<postDealloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
251+
! CHECK: }
252+
248253
end subroutine
249254

250255
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(

0 commit comments

Comments
 (0)