Skip to content

[flang][cuda] Set the allocator on fir.embox operation #101722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024

Conversation

clementval
Copy link
Contributor

This patch set the allocator_idx attribute for allocatable descriptor that have specific CUDA attribute.

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir labels Aug 2, 2024
@clementval clementval requested review from wangzpgi and vzakhari August 2, 2024 17:35
@llvmbot
Copy link
Member

llvmbot commented Aug 2, 2024

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

This patch set the allocator_idx attribute for allocatable descriptor that have specific CUDA attribute.


Full diff: https://github.com/llvm/llvm-project/pull/101722.diff

10 Files Affected:

  • (modified) flang/include/flang/Lower/Allocatable.h (+3-1)
  • (modified) flang/include/flang/Optimizer/Builder/MutableBox.h (+5-2)
  • (modified) flang/include/flang/Runtime/CUDA/allocator.h (-4)
  • (modified) flang/include/flang/Runtime/allocator-registry.h (+5)
  • (modified) flang/lib/Lower/Allocatable.cpp (+3-3)
  • (modified) flang/lib/Lower/ConvertVariable.cpp (+18-1)
  • (modified) flang/lib/Optimizer/Builder/MutableBox.cpp (+15-7)
  • (modified) flang/runtime/CUDA/allocator.cpp (+1-1)
  • (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+7)
  • (modified) flang/unittests/Runtime/CUDA/AllocatorCUF.cpp (+1)
diff --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index e8738f0407e77..1209b157ed1f4 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -15,6 +15,7 @@
 
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace mlir {
@@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
 fir::MutableBoxValue
 createMutableBox(AbstractConverter &converter, mlir::Location loc,
                  const pft::Variable &var, mlir::Value boxAddr,
-                 mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
+                 mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
+                 unsigned allocator = kDefaultAllocator);
 
 /// Assign a boxed value to a boxed variable, \p box (known as a
 /// MutableBoxValue). Expression \p source will be lowered to build the
diff --git a/flang/include/flang/Optimizer/Builder/MutableBox.h b/flang/include/flang/Optimizer/Builder/MutableBox.h
index 0573d2b238c20..fea7c7204837b 100644
--- a/flang/include/flang/Optimizer/Builder/MutableBox.h
+++ b/flang/include/flang/Optimizer/Builder/MutableBox.h
@@ -14,6 +14,7 @@
 #define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H
 
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace mlir {
@@ -43,7 +44,8 @@ namespace fir::factory {
 mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
                                  mlir::Type boxType,
                                  mlir::ValueRange nonDeferredParams,
-                                 mlir::Value typeSourceBox = {});
+                                 mlir::Value typeSourceBox = {},
+                                 unsigned allocator = kDefaultAllocator);
 
 /// Create a MutableBoxValue for a temporary allocatable.
 /// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
@@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
 /// address field of the MutableBoxValue to zero.
 void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
                             const fir::MutableBoxValue &box,
-                            bool polymorphicSetType = true);
+                            bool polymorphicSetType = true,
+                            unsigned allocator = kDefaultAllocator);
 
 /// Generate code to conditionally reallocate a MutableBoxValue with a new
 /// shape, lower bounds, and LEN parameters if it is unallocated or if its
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 9f6fb55bea744..46ff5dbe2f385 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -11,10 +11,6 @@
 
 #include "flang/Runtime/descriptor.h"
 
-static constexpr unsigned kPinnedAllocatorPos = 1;
-static constexpr unsigned kDeviceAllocatorPos = 2;
-static constexpr unsigned kManagedAllocatorPos = 3;
-
 #define CUDA_REPORT_IF_ERROR(expr) \
   [](CUresult result) { \
     if (!result) \
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index c481bec8e8e51..209b4d2e44e9b 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -15,6 +15,11 @@
 
 static constexpr unsigned kDefaultAllocator = 0;
 
+// Allocator used for CUF
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f40c15db93db8..d4d999f5c84a0 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
 fir::MutableBoxValue Fortran::lower::createMutableBox(
     Fortran::lower::AbstractConverter &converter, mlir::Location loc,
     const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
-    mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {
-
+    mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
   fir::MutableProperties mutableProperties = createMutableProperties(
       converter, loc, var, nonDeferredParams, alwaysUseBox);
   fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
     fir::factory::disassociateMutableBox(builder, loc, box,
-                                         /*polymorphicSetType=*/false);
+                                         /*polymorphicSetType=*/false,
+                                         allocator);
   return box;
 }
 
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 4fcfa0b126e04..45389091b8164 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -39,6 +39,7 @@
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "flang/Semantics/runtime-type-info.h"
 #include "flang/Semantics/tools.h"
 #include "llvm/Support/CommandLine.h"
@@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
                       replace);
 }
 
+static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
+  std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+      Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+  if (cudaAttr) {
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
+      return kPinnedAllocatorPos;
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
+      return kDeviceAllocatorPos;
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
+        *cudaAttr == Fortran::common::CUDADataAttr::Unified)
+      return kManagedAllocatorPos;
+  }
+  return kDefaultAllocator;
+}
+
 /// Lower specification expressions and attributes of variable \p var and
 /// add it to the symbol map. For a global or an alias, the address must be
 /// pre-computed and provided in \p preAlloc. A dummy argument for the current
@@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
     fir::MutableBoxValue box = Fortran::lower::createMutableBox(
         converter, loc, var, boxAlloc, nonDeferredLenParams,
         /*alwaysUseBox=*/
-        converter.getLoweringOptions().getLowerToHighLevelFIR());
+        converter.getLoweringOptions().getLowerToHighLevelFIR(),
+        getAllocatorIdx(var.getSymbol()));
     genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
                                    replace);
     return;
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index fcb9ddcf41386..aeb737acbf567 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -199,8 +199,9 @@ class MutablePropertyWriter {
 public:
   MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
                         const fir::MutableBoxValue &box,
-                        mlir::Value typeSourceBox = {})
-      : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
+                        mlir::Value typeSourceBox = {}, unsigned allocator = 0)
+      : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
+        allocator{allocator} {}
   /// Update MutableBoxValue with new address, shape and length parameters.
   /// Extents and lbounds must all have index type.
   /// lbounds can be empty in which case all ones is assumed.
@@ -242,7 +243,7 @@ class MutablePropertyWriter {
       // declared type, not retain the previous dynamic type.
       auto deallocatedBox = fir::factory::createUnallocatedBox(
           builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
-          typeSourceBox);
+          typeSourceBox, allocator);
       builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
     }
   }
@@ -276,7 +277,8 @@ class MutablePropertyWriter {
   /// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
   void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
                    mlir::ValueRange extents, mlir::ValueRange lengths,
-                   mlir::Value tdesc = {}) {
+                   mlir::Value tdesc = {},
+                   unsigned allocator = kDefaultAllocator) {
     mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
                                         extents, lengths, tdesc);
     builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
@@ -322,13 +324,15 @@ class MutablePropertyWriter {
   mlir::Location loc;
   fir::MutableBoxValue box;
   mlir::Value typeSourceBox;
+  unsigned allocator;
 };
 
 } // namespace
 
 mlir::Value fir::factory::createUnallocatedBox(
     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
-    mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
+    mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
+    unsigned allocator) {
   auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
   // Giving unallocated/disassociated status to assumed-rank POINTER/
   // ALLOCATABLE is not directly possible to a Fortran user. But the
@@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
   mlir::Value emptySlice;
   auto embox = builder.create<fir::EmboxOp>(
       loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
+  if (allocator != 0)
+    embox.setAllocatorIdx(allocator);
   if (isAssumedRank)
     return builder.createConvert(loc, boxType, embox);
   return embox;
@@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
 void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
                                           mlir::Location loc,
                                           const fir::MutableBoxValue &box,
-                                          bool polymorphicSetType) {
+                                          bool polymorphicSetType,
+                                          unsigned allocator) {
   if (box.isPolymorphic() && polymorphicSetType) {
     // 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
     // same as its declared type.
@@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
       return;
     }
   }
-  MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
+  MutablePropertyWriter{builder, loc, box, {}, allocator}
+      .setUnallocatedStatus();
 }
 
 static llvm::SmallVector<mlir::Value>
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 02eaba5636990..26a3c29696269 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Runtime/CUDA/allocator.h"
-#include "../allocator-registry.h"
 #include "../derived.h"
 #include "../stat.h"
 #include "../terminator.h"
 #include "../type-info.h"
 #include "flang/Common/Fortran.h"
 #include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda.h"
 
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 82c1063507535..cb6ca9af334fc 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -11,6 +11,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub1()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -37,6 +38,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub2()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
 ! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -60,6 +62,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub3()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
 ! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
 ! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -78,6 +81,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub4()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
 ! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -97,6 +101,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub5()
 ! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
 ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -118,6 +123,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub6()
 ! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
 ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -140,6 +146,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub7()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
 ! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 2a7c7fe25de85..f372ae18c202f 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -11,6 +11,7 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda.h"
 

@llvmbot
Copy link
Member

llvmbot commented Aug 2, 2024

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

This patch set the allocator_idx attribute for allocatable descriptor that have specific CUDA attribute.


Full diff: https://github.com/llvm/llvm-project/pull/101722.diff

10 Files Affected:

  • (modified) flang/include/flang/Lower/Allocatable.h (+3-1)
  • (modified) flang/include/flang/Optimizer/Builder/MutableBox.h (+5-2)
  • (modified) flang/include/flang/Runtime/CUDA/allocator.h (-4)
  • (modified) flang/include/flang/Runtime/allocator-registry.h (+5)
  • (modified) flang/lib/Lower/Allocatable.cpp (+3-3)
  • (modified) flang/lib/Lower/ConvertVariable.cpp (+18-1)
  • (modified) flang/lib/Optimizer/Builder/MutableBox.cpp (+15-7)
  • (modified) flang/runtime/CUDA/allocator.cpp (+1-1)
  • (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+7)
  • (modified) flang/unittests/Runtime/CUDA/AllocatorCUF.cpp (+1)
diff --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index e8738f0407e77..1209b157ed1f4 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -15,6 +15,7 @@
 
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace mlir {
@@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
 fir::MutableBoxValue
 createMutableBox(AbstractConverter &converter, mlir::Location loc,
                  const pft::Variable &var, mlir::Value boxAddr,
-                 mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
+                 mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
+                 unsigned allocator = kDefaultAllocator);
 
 /// Assign a boxed value to a boxed variable, \p box (known as a
 /// MutableBoxValue). Expression \p source will be lowered to build the
diff --git a/flang/include/flang/Optimizer/Builder/MutableBox.h b/flang/include/flang/Optimizer/Builder/MutableBox.h
index 0573d2b238c20..fea7c7204837b 100644
--- a/flang/include/flang/Optimizer/Builder/MutableBox.h
+++ b/flang/include/flang/Optimizer/Builder/MutableBox.h
@@ -14,6 +14,7 @@
 #define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H
 
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace mlir {
@@ -43,7 +44,8 @@ namespace fir::factory {
 mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
                                  mlir::Type boxType,
                                  mlir::ValueRange nonDeferredParams,
-                                 mlir::Value typeSourceBox = {});
+                                 mlir::Value typeSourceBox = {},
+                                 unsigned allocator = kDefaultAllocator);
 
 /// Create a MutableBoxValue for a temporary allocatable.
 /// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
@@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
 /// address field of the MutableBoxValue to zero.
 void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
                             const fir::MutableBoxValue &box,
-                            bool polymorphicSetType = true);
+                            bool polymorphicSetType = true,
+                            unsigned allocator = kDefaultAllocator);
 
 /// Generate code to conditionally reallocate a MutableBoxValue with a new
 /// shape, lower bounds, and LEN parameters if it is unallocated or if its
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 9f6fb55bea744..46ff5dbe2f385 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -11,10 +11,6 @@
 
 #include "flang/Runtime/descriptor.h"
 
-static constexpr unsigned kPinnedAllocatorPos = 1;
-static constexpr unsigned kDeviceAllocatorPos = 2;
-static constexpr unsigned kManagedAllocatorPos = 3;
-
 #define CUDA_REPORT_IF_ERROR(expr) \
   [](CUresult result) { \
     if (!result) \
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index c481bec8e8e51..209b4d2e44e9b 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -15,6 +15,11 @@
 
 static constexpr unsigned kDefaultAllocator = 0;
 
+// Allocator used for CUF
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f40c15db93db8..d4d999f5c84a0 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
 fir::MutableBoxValue Fortran::lower::createMutableBox(
     Fortran::lower::AbstractConverter &converter, mlir::Location loc,
     const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
-    mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {
-
+    mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
   fir::MutableProperties mutableProperties = createMutableProperties(
       converter, loc, var, nonDeferredParams, alwaysUseBox);
   fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
     fir::factory::disassociateMutableBox(builder, loc, box,
-                                         /*polymorphicSetType=*/false);
+                                         /*polymorphicSetType=*/false,
+                                         allocator);
   return box;
 }
 
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 4fcfa0b126e04..45389091b8164 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -39,6 +39,7 @@
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"
 #include "flang/Semantics/runtime-type-info.h"
 #include "flang/Semantics/tools.h"
 #include "llvm/Support/CommandLine.h"
@@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
                       replace);
 }
 
+static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
+  std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+      Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+  if (cudaAttr) {
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
+      return kPinnedAllocatorPos;
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
+      return kDeviceAllocatorPos;
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
+        *cudaAttr == Fortran::common::CUDADataAttr::Unified)
+      return kManagedAllocatorPos;
+  }
+  return kDefaultAllocator;
+}
+
 /// Lower specification expressions and attributes of variable \p var and
 /// add it to the symbol map. For a global or an alias, the address must be
 /// pre-computed and provided in \p preAlloc. A dummy argument for the current
@@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
     fir::MutableBoxValue box = Fortran::lower::createMutableBox(
         converter, loc, var, boxAlloc, nonDeferredLenParams,
         /*alwaysUseBox=*/
-        converter.getLoweringOptions().getLowerToHighLevelFIR());
+        converter.getLoweringOptions().getLowerToHighLevelFIR(),
+        getAllocatorIdx(var.getSymbol()));
     genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
                                    replace);
     return;
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index fcb9ddcf41386..aeb737acbf567 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -199,8 +199,9 @@ class MutablePropertyWriter {
 public:
   MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
                         const fir::MutableBoxValue &box,
-                        mlir::Value typeSourceBox = {})
-      : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
+                        mlir::Value typeSourceBox = {}, unsigned allocator = 0)
+      : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
+        allocator{allocator} {}
   /// Update MutableBoxValue with new address, shape and length parameters.
   /// Extents and lbounds must all have index type.
   /// lbounds can be empty in which case all ones is assumed.
@@ -242,7 +243,7 @@ class MutablePropertyWriter {
       // declared type, not retain the previous dynamic type.
       auto deallocatedBox = fir::factory::createUnallocatedBox(
           builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
-          typeSourceBox);
+          typeSourceBox, allocator);
       builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
     }
   }
@@ -276,7 +277,8 @@ class MutablePropertyWriter {
   /// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
   void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
                    mlir::ValueRange extents, mlir::ValueRange lengths,
-                   mlir::Value tdesc = {}) {
+                   mlir::Value tdesc = {},
+                   unsigned allocator = kDefaultAllocator) {
     mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
                                         extents, lengths, tdesc);
     builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
@@ -322,13 +324,15 @@ class MutablePropertyWriter {
   mlir::Location loc;
   fir::MutableBoxValue box;
   mlir::Value typeSourceBox;
+  unsigned allocator;
 };
 
 } // namespace
 
 mlir::Value fir::factory::createUnallocatedBox(
     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
-    mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
+    mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
+    unsigned allocator) {
   auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
   // Giving unallocated/disassociated status to assumed-rank POINTER/
   // ALLOCATABLE is not directly possible to a Fortran user. But the
@@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
   mlir::Value emptySlice;
   auto embox = builder.create<fir::EmboxOp>(
       loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
+  if (allocator != 0)
+    embox.setAllocatorIdx(allocator);
   if (isAssumedRank)
     return builder.createConvert(loc, boxType, embox);
   return embox;
@@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
 void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
                                           mlir::Location loc,
                                           const fir::MutableBoxValue &box,
-                                          bool polymorphicSetType) {
+                                          bool polymorphicSetType,
+                                          unsigned allocator) {
   if (box.isPolymorphic() && polymorphicSetType) {
     // 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
     // same as its declared type.
@@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
       return;
     }
   }
-  MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
+  MutablePropertyWriter{builder, loc, box, {}, allocator}
+      .setUnallocatedStatus();
 }
 
 static llvm::SmallVector<mlir::Value>
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 02eaba5636990..26a3c29696269 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Runtime/CUDA/allocator.h"
-#include "../allocator-registry.h"
 #include "../derived.h"
 #include "../stat.h"
 #include "../terminator.h"
 #include "../type-info.h"
 #include "flang/Common/Fortran.h"
 #include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda.h"
 
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 82c1063507535..cb6ca9af334fc 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -11,6 +11,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub1()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: fir.call @_FortranAAllocatableSetBounds
 ! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -37,6 +38,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub2()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
 ! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -60,6 +62,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub3()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
 ! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
 ! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -78,6 +81,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub4()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
 ! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -97,6 +101,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub5()
 ! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
 ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -118,6 +123,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub6()
 ! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
 ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -140,6 +146,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPsub7()
 ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
 ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
 ! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
 ! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 2a7c7fe25de85..f372ae18c202f 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -11,6 +11,7 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda.h"
 

Copy link
Contributor

@vzakhari vzakhari left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@clementval clementval merged commit bbdb1e4 into llvm:main Aug 2, 2024
11 checks passed
@clementval clementval deleted the cuf_embox_allocator_idx branch August 2, 2024 21:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:fir-hlfir flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants