Skip to content

Commit 9435edf

Browse files
authored
[flang][cuda] Lower DEALLOCATE for device variables (#89091)
Replace the runtime call to `AllocatableDeallocate` for CUDA device variable to the newly added `fir.cuda_deallocate` operation. This is similar with #88980 A third patch will handle the case of automatic dealloctaion of device allocatable variables
1 parent 0cee894 commit 9435edf

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

flang/lib/Lower/Allocatable.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -799,17 +799,40 @@ static void postDeallocationAction(Fortran::lower::AbstractConverter &converter,
799799
Fortran::lower::attachDeclarePostDeallocAction(converter, builder, sym);
800800
}
801801

802+
static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
803+
mlir::Location loc,
804+
const fir::MutableBoxValue &box,
805+
ErrorManager &errorManager,
806+
const Fortran::semantics::Symbol &sym) {
807+
fir::CUDADataAttributeAttr cudaAttr =
808+
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
809+
sym);
810+
mlir::Value errmsg =
811+
mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
812+
? nullptr
813+
: errorManager.errMsgAddr;
814+
815+
// Keep return type the same as a standard AllocatableAllocate call.
816+
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
817+
return builder
818+
.create<fir::CUDADeallocateOp>(
819+
loc, retTy, box.getAddr(), errmsg, cudaAttr,
820+
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
821+
.getResult();
822+
}
823+
802824
// Generate deallocation of a pointer/allocatable.
803825
static mlir::Value
804826
genDeallocate(fir::FirOpBuilder &builder,
805827
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
806828
const fir::MutableBoxValue &box, ErrorManager &errorManager,
807829
mlir::Value declaredTypeDesc = {},
808830
const Fortran::semantics::Symbol *symbol = nullptr) {
831+
bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
809832
// Deallocate intrinsic types inline.
810833
if (!box.isDerived() && !box.isPolymorphic() &&
811834
!box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
812-
!useAllocateRuntime && !box.isPointer()) {
835+
!useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
813836
// Pointers must use PointerDeallocate so that their deallocations
814837
// can be validated.
815838
mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
@@ -820,8 +843,12 @@ genDeallocate(fir::FirOpBuilder &builder,
820843
// Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
821844
// with its descriptor before and after calls if needed.
822845
errorManager.genStatCheck(builder, loc);
823-
mlir::Value stat =
824-
genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
846+
mlir::Value stat;
847+
if (!isCudaSymbol)
848+
stat =
849+
genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
850+
else
851+
stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
825852
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
826853
if (symbol)
827854
postDeallocationAction(converter, builder, *symbol);

flang/test/Lower/CUDA/cuda-allocatable.cuf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
subroutine sub1()
66
real, allocatable, device :: a(:)
77
allocate(a(10))
8+
9+
deallocate(a)
810
end subroutine
911

1012
! CHECK-LABEL: func.func @_QPsub1()
@@ -13,10 +15,14 @@ end subroutine
1315
! CHECK: fir.call @_FortranAAllocatableSetBounds
1416
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
1517

18+
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
19+
1620
subroutine sub2()
1721
real, allocatable, managed :: a(:)
1822
integer :: istat
1923
allocate(a(10), stat=istat)
24+
25+
deallocate(a, stat=istat)
2026
end subroutine
2127

2228
! CHECK-LABEL: func.func @_QPsub2()
@@ -28,6 +34,9 @@ end subroutine
2834
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
2935
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
3036

37+
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
38+
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
39+
3140
subroutine sub3()
3241
integer, allocatable, pinned :: a(:,:)
3342
logical :: plog
@@ -92,6 +101,8 @@ subroutine sub7()
92101
integer :: istat
93102
character(50) :: err
94103
allocate(a(100), stat=istat, errmsg=err)
104+
105+
deallocate(a, stat=istat, errmsg=err)
95106
end subroutine
96107

97108
! CHECK-LABEL: func.func @_QPsub7()
@@ -105,3 +116,7 @@ end subroutine
105116
! CHECK: fir.call @_FortranAAllocatableSetBounds
106117
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
107118
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
119+
120+
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
121+
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
122+
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>

0 commit comments

Comments
 (0)