-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Lower DEALLOCATE for device variables #89091
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesReplace the runtime call to This is similar with #88980 A third patch will handle the case of automatic dealloctaion of device allocatable variables Full diff: https://github.com/llvm/llvm-project/pull/89091.diff 2 Files Affected:
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 1d434d512d0c5c..38f61528d7e28a 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -799,6 +799,28 @@ static void postDeallocationAction(Fortran::lower::AbstractConverter &converter,
Fortran::lower::attachDeclarePostDeallocAction(converter, builder, sym);
}
+static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ const fir::MutableBoxValue &box,
+ ErrorManager &errorManager,
+ const Fortran::semantics::Symbol &sym) {
+ fir::CUDADataAttributeAttr cudaAttr =
+ Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
+ sym);
+ mlir::Value errmsg =
+ mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
+ ? nullptr
+ : errorManager.errMsgAddr;
+
+ // Keep return type the same as a standard AllocatableAllocate call.
+ mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+ return builder
+ .create<fir::CUDADeallocateOp>(
+ loc, retTy, box.getAddr(), errmsg, cudaAttr,
+ errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+ .getResult();
+}
+
// Generate deallocation of a pointer/allocatable.
static mlir::Value
genDeallocate(fir::FirOpBuilder &builder,
@@ -806,10 +828,11 @@ genDeallocate(fir::FirOpBuilder &builder,
const fir::MutableBoxValue &box, ErrorManager &errorManager,
mlir::Value declaredTypeDesc = {},
const Fortran::semantics::Symbol *symbol = nullptr) {
+ bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
// Deallocate intrinsic types inline.
if (!box.isDerived() && !box.isPolymorphic() &&
!box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
- !useAllocateRuntime && !box.isPointer()) {
+ !useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
// Pointers must use PointerDeallocate so that their deallocations
// can be validated.
mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
@@ -820,8 +843,12 @@ genDeallocate(fir::FirOpBuilder &builder,
// Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
// with its descriptor before and after calls if needed.
errorManager.genStatCheck(builder, loc);
- mlir::Value stat =
- genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+ mlir::Value stat;
+ if (!isCudaSymbol)
+ stat =
+ genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+ else
+ stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
if (symbol)
postDeallocationAction(converter, builder, *symbol);
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 55223011e8d9e9..5b10334ecdbc14 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -5,6 +5,8 @@
subroutine sub1()
real, allocatable, device :: a(:)
allocate(a(10))
+
+ deallocate(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub1()
@@ -13,10 +15,14 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+
subroutine sub2()
real, allocatable, managed :: a(:)
integer :: istat
allocate(a(10), stat=istat)
+
+ deallocate(a, stat=istat)
end subroutine
! CHECK-LABEL: func.func @_QPsub2()
@@ -28,6 +34,9 @@ end subroutine
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
subroutine sub3()
integer, allocatable, pinned :: a(:,:)
logical :: plog
@@ -92,6 +101,8 @@ subroutine sub7()
integer :: istat
character(50) :: err
allocate(a(100), stat=istat, errmsg=err)
+
+ deallocate(a, stat=istat, errmsg=err)
end subroutine
! CHECK-LABEL: func.func @_QPsub7()
@@ -105,3 +116,7 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
+! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, Valentin!
Replace the runtime call to
AllocatableDeallocate
for CUDA device variable to the newly addedfir.cuda_deallocate
operation.This is similar with #88980
A third patch will handle the case of automatic dealloctaion of device allocatable variables