[flang][cuda] Lower DEALLOCATE for device variables #89091

clementval · 2024-04-17T15:46:20Z

Replace the runtime call to AllocatableDeallocate for CUDA device variable to the newly added fir.cuda_deallocate operation.

This is similar with #88980

A third patch will handle the case of automatic dealloctaion of device allocatable variables

llvmbot · 2024-04-17T15:46:53Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Replace the runtime call to AllocatableDeallocate for CUDA device variable to the newly added fir.cuda_deallocate operation.

This is similar with #88980

A third patch will handle the case of automatic dealloctaion of device allocatable variables

Full diff: https://github.com/llvm/llvm-project/pull/89091.diff

2 Files Affected:

(modified) flang/lib/Lower/Allocatable.cpp (+30-3)
(modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+15)

diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 1d434d512d0c5c..38f61528d7e28a 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -799,6 +799,28 @@ static void postDeallocationAction(Fortran::lower::AbstractConverter &converter,
     Fortran::lower::attachDeclarePostDeallocAction(converter, builder, sym);
 }
 
+static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
+                                     mlir::Location loc,
+                                     const fir::MutableBoxValue &box,
+                                     ErrorManager &errorManager,
+                                     const Fortran::semantics::Symbol &sym) {
+  fir::CUDADataAttributeAttr cudaAttr =
+      Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
+                                                       sym);
+  mlir::Value errmsg =
+      mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
+          ? nullptr
+          : errorManager.errMsgAddr;
+
+  // Keep return type the same as a standard AllocatableAllocate call.
+  mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
+  return builder
+      .create<fir::CUDADeallocateOp>(
+          loc, retTy, box.getAddr(), errmsg, cudaAttr,
+          errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
+      .getResult();
+}
+
 // Generate deallocation of a pointer/allocatable.
 static mlir::Value
 genDeallocate(fir::FirOpBuilder &builder,
@@ -806,10 +828,11 @@ genDeallocate(fir::FirOpBuilder &builder,
               const fir::MutableBoxValue &box, ErrorManager &errorManager,
               mlir::Value declaredTypeDesc = {},
               const Fortran::semantics::Symbol *symbol = nullptr) {
+  bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
   // Deallocate intrinsic types inline.
   if (!box.isDerived() && !box.isPolymorphic() &&
       !box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
-      !useAllocateRuntime && !box.isPointer()) {
+      !useAllocateRuntime && !box.isPointer() && !isCudaSymbol) {
     // Pointers must use PointerDeallocate so that their deallocations
     // can be validated.
     mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
@@ -820,8 +843,12 @@ genDeallocate(fir::FirOpBuilder &builder,
   // Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
   // with its descriptor before and after calls if needed.
   errorManager.genStatCheck(builder, loc);
-  mlir::Value stat =
-      genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+  mlir::Value stat;
+  if (!isCudaSymbol)
+    stat =
+        genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
+  else
+    stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
   fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
   if (symbol)
     postDeallocationAction(converter, builder, *symbol);
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 55223011e8d9e9..5b10334ecdbc14 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -5,6 +5,8 @@
 subroutine sub1()
   real, allocatable, device :: a(:)
   allocate(a(10))
+
+  deallocate(a)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub1()
@@ -13,10 +15,14 @@ end subroutine
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
 
+! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+
 subroutine sub2()
   real, allocatable, managed :: a(:)
   integer :: istat
   allocate(a(10), stat=istat)
+
+  deallocate(a, stat=istat)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub2()
@@ -28,6 +34,9 @@ end subroutine
 ! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
 
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
 subroutine sub3()
   integer, allocatable, pinned :: a(:,:)
   logical :: plog
@@ -92,6 +101,8 @@ subroutine sub7()
   integer :: istat
   character(50) :: err
   allocate(a(100), stat=istat, errmsg=err)
+
+  deallocate(a, stat=istat, errmsg=err)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub7()
@@ -105,3 +116,7 @@ end subroutine
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
+
+! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
+! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>

vzakhari

Thank you, Valentin!

flang/lib/Lower/Allocatable.cpp

[flang][cuda] Lower DEALLOCATE for device variables

ce4712a

clementval requested review from jeanPerier and vzakhari April 17, 2024 15:46

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Apr 17, 2024

clementval requested a review from wangzpgi April 17, 2024 16:01

vzakhari approved these changes Apr 17, 2024

View reviewed changes

flang/lib/Lower/Allocatable.cpp Show resolved Hide resolved

clementval merged commit 9435edf into llvm:main Apr 17, 2024

clementval deleted the cuda_deallocate_lower branch April 17, 2024 20:45

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[flang][cuda] Lower DEALLOCATE for device variables #89091

[flang][cuda] Lower DEALLOCATE for device variables #89091

Uh oh!

clementval commented Apr 17, 2024

Uh oh!

llvmbot commented Apr 17, 2024

Uh oh!

vzakhari left a comment

Uh oh!

Uh oh!

Uh oh!

[flang][cuda] Lower DEALLOCATE for device variables #89091

[flang][cuda] Lower DEALLOCATE for device variables #89091

Uh oh!

Conversation

clementval commented Apr 17, 2024

Uh oh!

llvmbot commented Apr 17, 2024

Uh oh!

vzakhari left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!