-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[flang][cuda] Set the allocator on fir.embox operation #101722
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThis patch set the Full diff: https://github.com/llvm/llvm-project/pull/101722.diff 10 Files Affected:
diff --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index e8738f0407e77..1209b157ed1f4 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -15,6 +15,7 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"
namespace mlir {
@@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
fir::MutableBoxValue
createMutableBox(AbstractConverter &converter, mlir::Location loc,
const pft::Variable &var, mlir::Value boxAddr,
- mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
+ mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
+ unsigned allocator = kDefaultAllocator);
/// Assign a boxed value to a boxed variable, \p box (known as a
/// MutableBoxValue). Expression \p source will be lowered to build the
diff --git a/flang/include/flang/Optimizer/Builder/MutableBox.h b/flang/include/flang/Optimizer/Builder/MutableBox.h
index 0573d2b238c20..fea7c7204837b 100644
--- a/flang/include/flang/Optimizer/Builder/MutableBox.h
+++ b/flang/include/flang/Optimizer/Builder/MutableBox.h
@@ -14,6 +14,7 @@
#define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H
#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"
namespace mlir {
@@ -43,7 +44,8 @@ namespace fir::factory {
mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type boxType,
mlir::ValueRange nonDeferredParams,
- mlir::Value typeSourceBox = {});
+ mlir::Value typeSourceBox = {},
+ unsigned allocator = kDefaultAllocator);
/// Create a MutableBoxValue for a temporary allocatable.
/// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
@@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
/// address field of the MutableBoxValue to zero.
void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
- bool polymorphicSetType = true);
+ bool polymorphicSetType = true,
+ unsigned allocator = kDefaultAllocator);
/// Generate code to conditionally reallocate a MutableBoxValue with a new
/// shape, lower bounds, and LEN parameters if it is unallocated or if its
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 9f6fb55bea744..46ff5dbe2f385 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -11,10 +11,6 @@
#include "flang/Runtime/descriptor.h"
-static constexpr unsigned kPinnedAllocatorPos = 1;
-static constexpr unsigned kDeviceAllocatorPos = 2;
-static constexpr unsigned kManagedAllocatorPos = 3;
-
#define CUDA_REPORT_IF_ERROR(expr) \
[](CUresult result) { \
if (!result) \
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index c481bec8e8e51..209b4d2e44e9b 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -15,6 +15,11 @@
static constexpr unsigned kDefaultAllocator = 0;
+// Allocator used for CUF
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
#define MAX_ALLOCATOR 5
namespace Fortran::runtime {
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f40c15db93db8..d4d999f5c84a0 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
fir::MutableBoxValue Fortran::lower::createMutableBox(
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
- mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {
-
+ mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
fir::MutableProperties mutableProperties = createMutableProperties(
converter, loc, var, nonDeferredParams, alwaysUseBox);
fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
fir::factory::disassociateMutableBox(builder, loc, box,
- /*polymorphicSetType=*/false);
+ /*polymorphicSetType=*/false,
+ allocator);
return box;
}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 4fcfa0b126e04..45389091b8164 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -39,6 +39,7 @@
#include "flang/Optimizer/Support/FatalError.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"
#include "flang/Semantics/runtime-type-info.h"
#include "flang/Semantics/tools.h"
#include "llvm/Support/CommandLine.h"
@@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
replace);
}
+static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
+ std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+ Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+ if (cudaAttr) {
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
+ return kPinnedAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
+ return kDeviceAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
+ *cudaAttr == Fortran::common::CUDADataAttr::Unified)
+ return kManagedAllocatorPos;
+ }
+ return kDefaultAllocator;
+}
+
/// Lower specification expressions and attributes of variable \p var and
/// add it to the symbol map. For a global or an alias, the address must be
/// pre-computed and provided in \p preAlloc. A dummy argument for the current
@@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
fir::MutableBoxValue box = Fortran::lower::createMutableBox(
converter, loc, var, boxAlloc, nonDeferredLenParams,
/*alwaysUseBox=*/
- converter.getLoweringOptions().getLowerToHighLevelFIR());
+ converter.getLoweringOptions().getLowerToHighLevelFIR(),
+ getAllocatorIdx(var.getSymbol()));
genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
replace);
return;
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index fcb9ddcf41386..aeb737acbf567 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -199,8 +199,9 @@ class MutablePropertyWriter {
public:
MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
- mlir::Value typeSourceBox = {})
- : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
+ mlir::Value typeSourceBox = {}, unsigned allocator = 0)
+ : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
+ allocator{allocator} {}
/// Update MutableBoxValue with new address, shape and length parameters.
/// Extents and lbounds must all have index type.
/// lbounds can be empty in which case all ones is assumed.
@@ -242,7 +243,7 @@ class MutablePropertyWriter {
// declared type, not retain the previous dynamic type.
auto deallocatedBox = fir::factory::createUnallocatedBox(
builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
- typeSourceBox);
+ typeSourceBox, allocator);
builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
}
}
@@ -276,7 +277,8 @@ class MutablePropertyWriter {
/// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
mlir::ValueRange extents, mlir::ValueRange lengths,
- mlir::Value tdesc = {}) {
+ mlir::Value tdesc = {},
+ unsigned allocator = kDefaultAllocator) {
mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
extents, lengths, tdesc);
builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
@@ -322,13 +324,15 @@ class MutablePropertyWriter {
mlir::Location loc;
fir::MutableBoxValue box;
mlir::Value typeSourceBox;
+ unsigned allocator;
};
} // namespace
mlir::Value fir::factory::createUnallocatedBox(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
- mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
+ mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
+ unsigned allocator) {
auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
// Giving unallocated/disassociated status to assumed-rank POINTER/
// ALLOCATABLE is not directly possible to a Fortran user. But the
@@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
mlir::Value emptySlice;
auto embox = builder.create<fir::EmboxOp>(
loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
+ if (allocator != 0)
+ embox.setAllocatorIdx(allocator);
if (isAssumedRank)
return builder.createConvert(loc, boxType, embox);
return embox;
@@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
mlir::Location loc,
const fir::MutableBoxValue &box,
- bool polymorphicSetType) {
+ bool polymorphicSetType,
+ unsigned allocator) {
if (box.isPolymorphic() && polymorphicSetType) {
// 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
// same as its declared type.
@@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
return;
}
}
- MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
+ MutablePropertyWriter{builder, loc, box, {}, allocator}
+ .setUnallocatedStatus();
}
static llvm::SmallVector<mlir::Value>
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 02eaba5636990..26a3c29696269 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/allocator.h"
-#include "../allocator-registry.h"
#include "../derived.h"
#include "../stat.h"
#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda.h"
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 82c1063507535..cb6ca9af334fc 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -11,6 +11,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -37,6 +38,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -60,6 +62,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -78,6 +81,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -97,6 +101,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -118,6 +123,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -140,6 +146,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 2a7c7fe25de85..f372ae18c202f 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -11,6 +11,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda.h"
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThis patch set the Full diff: https://github.com/llvm/llvm-project/pull/101722.diff 10 Files Affected:
diff --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index e8738f0407e77..1209b157ed1f4 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -15,6 +15,7 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"
namespace mlir {
@@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
fir::MutableBoxValue
createMutableBox(AbstractConverter &converter, mlir::Location loc,
const pft::Variable &var, mlir::Value boxAddr,
- mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
+ mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
+ unsigned allocator = kDefaultAllocator);
/// Assign a boxed value to a boxed variable, \p box (known as a
/// MutableBoxValue). Expression \p source will be lowered to build the
diff --git a/flang/include/flang/Optimizer/Builder/MutableBox.h b/flang/include/flang/Optimizer/Builder/MutableBox.h
index 0573d2b238c20..fea7c7204837b 100644
--- a/flang/include/flang/Optimizer/Builder/MutableBox.h
+++ b/flang/include/flang/Optimizer/Builder/MutableBox.h
@@ -14,6 +14,7 @@
#define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H
#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"
namespace mlir {
@@ -43,7 +44,8 @@ namespace fir::factory {
mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type boxType,
mlir::ValueRange nonDeferredParams,
- mlir::Value typeSourceBox = {});
+ mlir::Value typeSourceBox = {},
+ unsigned allocator = kDefaultAllocator);
/// Create a MutableBoxValue for a temporary allocatable.
/// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
@@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
/// address field of the MutableBoxValue to zero.
void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
- bool polymorphicSetType = true);
+ bool polymorphicSetType = true,
+ unsigned allocator = kDefaultAllocator);
/// Generate code to conditionally reallocate a MutableBoxValue with a new
/// shape, lower bounds, and LEN parameters if it is unallocated or if its
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 9f6fb55bea744..46ff5dbe2f385 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -11,10 +11,6 @@
#include "flang/Runtime/descriptor.h"
-static constexpr unsigned kPinnedAllocatorPos = 1;
-static constexpr unsigned kDeviceAllocatorPos = 2;
-static constexpr unsigned kManagedAllocatorPos = 3;
-
#define CUDA_REPORT_IF_ERROR(expr) \
[](CUresult result) { \
if (!result) \
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index c481bec8e8e51..209b4d2e44e9b 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -15,6 +15,11 @@
static constexpr unsigned kDefaultAllocator = 0;
+// Allocator used for CUF
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
#define MAX_ALLOCATOR 5
namespace Fortran::runtime {
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index f40c15db93db8..d4d999f5c84a0 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
fir::MutableBoxValue Fortran::lower::createMutableBox(
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
- mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {
-
+ mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
fir::MutableProperties mutableProperties = createMutableProperties(
converter, loc, var, nonDeferredParams, alwaysUseBox);
fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
fir::factory::disassociateMutableBox(builder, loc, box,
- /*polymorphicSetType=*/false);
+ /*polymorphicSetType=*/false,
+ allocator);
return box;
}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 4fcfa0b126e04..45389091b8164 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -39,6 +39,7 @@
#include "flang/Optimizer/Support/FatalError.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"
#include "flang/Semantics/runtime-type-info.h"
#include "flang/Semantics/tools.h"
#include "llvm/Support/CommandLine.h"
@@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
replace);
}
+static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
+ std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+ Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+ if (cudaAttr) {
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
+ return kPinnedAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
+ return kDeviceAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
+ *cudaAttr == Fortran::common::CUDADataAttr::Unified)
+ return kManagedAllocatorPos;
+ }
+ return kDefaultAllocator;
+}
+
/// Lower specification expressions and attributes of variable \p var and
/// add it to the symbol map. For a global or an alias, the address must be
/// pre-computed and provided in \p preAlloc. A dummy argument for the current
@@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
fir::MutableBoxValue box = Fortran::lower::createMutableBox(
converter, loc, var, boxAlloc, nonDeferredLenParams,
/*alwaysUseBox=*/
- converter.getLoweringOptions().getLowerToHighLevelFIR());
+ converter.getLoweringOptions().getLowerToHighLevelFIR(),
+ getAllocatorIdx(var.getSymbol()));
genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
replace);
return;
diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp
index fcb9ddcf41386..aeb737acbf567 100644
--- a/flang/lib/Optimizer/Builder/MutableBox.cpp
+++ b/flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -199,8 +199,9 @@ class MutablePropertyWriter {
public:
MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
- mlir::Value typeSourceBox = {})
- : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
+ mlir::Value typeSourceBox = {}, unsigned allocator = 0)
+ : builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
+ allocator{allocator} {}
/// Update MutableBoxValue with new address, shape and length parameters.
/// Extents and lbounds must all have index type.
/// lbounds can be empty in which case all ones is assumed.
@@ -242,7 +243,7 @@ class MutablePropertyWriter {
// declared type, not retain the previous dynamic type.
auto deallocatedBox = fir::factory::createUnallocatedBox(
builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
- typeSourceBox);
+ typeSourceBox, allocator);
builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
}
}
@@ -276,7 +277,8 @@ class MutablePropertyWriter {
/// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
mlir::ValueRange extents, mlir::ValueRange lengths,
- mlir::Value tdesc = {}) {
+ mlir::Value tdesc = {},
+ unsigned allocator = kDefaultAllocator) {
mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
extents, lengths, tdesc);
builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
@@ -322,13 +324,15 @@ class MutablePropertyWriter {
mlir::Location loc;
fir::MutableBoxValue box;
mlir::Value typeSourceBox;
+ unsigned allocator;
};
} // namespace
mlir::Value fir::factory::createUnallocatedBox(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
- mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
+ mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
+ unsigned allocator) {
auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
// Giving unallocated/disassociated status to assumed-rank POINTER/
// ALLOCATABLE is not directly possible to a Fortran user. But the
@@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
mlir::Value emptySlice;
auto embox = builder.create<fir::EmboxOp>(
loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
+ if (allocator != 0)
+ embox.setAllocatorIdx(allocator);
if (isAssumedRank)
return builder.createConvert(loc, boxType, embox);
return embox;
@@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
mlir::Location loc,
const fir::MutableBoxValue &box,
- bool polymorphicSetType) {
+ bool polymorphicSetType,
+ unsigned allocator) {
if (box.isPolymorphic() && polymorphicSetType) {
// 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
// same as its declared type.
@@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
return;
}
}
- MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
+ MutablePropertyWriter{builder, loc, box, {}, allocator}
+ .setUnallocatedStatus();
}
static llvm::SmallVector<mlir::Value>
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 02eaba5636990..26a3c29696269 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/allocator.h"
-#include "../allocator-registry.h"
#include "../derived.h"
#include "../stat.h"
#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda.h"
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 82c1063507535..cb6ca9af334fc 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -11,6 +11,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -37,6 +38,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -60,6 +62,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -78,6 +81,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -97,6 +101,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -118,6 +123,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -140,6 +146,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 2a7c7fe25de85..f372ae18c202f 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -11,6 +11,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda.h"
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch set the
allocator_idx
attribute for allocatable descriptor that have specific CUDA attribute.