Skip to content

[flang][cuda] Set the allocator on fir.embox operation #101722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion flang/include/flang/Lower/Allocatable.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"

namespace mlir {
Expand Down Expand Up @@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
fir::MutableBoxValue
createMutableBox(AbstractConverter &converter, mlir::Location loc,
const pft::Variable &var, mlir::Value boxAddr,
mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
unsigned allocator = kDefaultAllocator);

/// Assign a boxed value to a boxed variable, \p box (known as a
/// MutableBoxValue). Expression \p source will be lowered to build the
Expand Down
7 changes: 5 additions & 2 deletions flang/include/flang/Optimizer/Builder/MutableBox.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H

#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"

namespace mlir {
Expand Down Expand Up @@ -43,7 +44,8 @@ namespace fir::factory {
mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type boxType,
mlir::ValueRange nonDeferredParams,
mlir::Value typeSourceBox = {});
mlir::Value typeSourceBox = {},
unsigned allocator = kDefaultAllocator);

/// Create a MutableBoxValue for a temporary allocatable.
/// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
Expand Down Expand Up @@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
/// address field of the MutableBoxValue to zero.
void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
bool polymorphicSetType = true);
bool polymorphicSetType = true,
unsigned allocator = kDefaultAllocator);

/// Generate code to conditionally reallocate a MutableBoxValue with a new
/// shape, lower bounds, and LEN parameters if it is unallocated or if its
Expand Down
4 changes: 0 additions & 4 deletions flang/include/flang/Runtime/CUDA/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@

#include "flang/Runtime/descriptor.h"

static constexpr unsigned kPinnedAllocatorPos = 1;
static constexpr unsigned kDeviceAllocatorPos = 2;
static constexpr unsigned kManagedAllocatorPos = 3;

#define CUDA_REPORT_IF_ERROR(expr) \
[](CUresult result) { \
if (!result) \
Expand Down
5 changes: 5 additions & 0 deletions flang/include/flang/Runtime/allocator-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@

static constexpr unsigned kDefaultAllocator = 0;

// Allocator used for CUF
static constexpr unsigned kPinnedAllocatorPos = 1;
static constexpr unsigned kDeviceAllocatorPos = 2;
static constexpr unsigned kManagedAllocatorPos = 3;

#define MAX_ALLOCATOR 5

namespace Fortran::runtime {
Expand Down
6 changes: 3 additions & 3 deletions flang/lib/Lower/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
fir::MutableBoxValue Fortran::lower::createMutableBox(
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {

mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
fir::MutableProperties mutableProperties = createMutableProperties(
converter, loc, var, nonDeferredParams, alwaysUseBox);
fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
fir::factory::disassociateMutableBox(builder, loc, box,
/*polymorphicSetType=*/false);
/*polymorphicSetType=*/false,
allocator);
return box;
}

Expand Down
19 changes: 18 additions & 1 deletion flang/lib/Lower/ConvertVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "flang/Optimizer/Support/FatalError.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Support/Utils.h"
#include "flang/Runtime/allocator-registry.h"
#include "flang/Semantics/runtime-type-info.h"
#include "flang/Semantics/tools.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
replace);
}

static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
if (cudaAttr) {
if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
return kPinnedAllocatorPos;
if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
return kDeviceAllocatorPos;
if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
*cudaAttr == Fortran::common::CUDADataAttr::Unified)
return kManagedAllocatorPos;
}
return kDefaultAllocator;
}

/// Lower specification expressions and attributes of variable \p var and
/// add it to the symbol map. For a global or an alias, the address must be
/// pre-computed and provided in \p preAlloc. A dummy argument for the current
Expand Down Expand Up @@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
fir::MutableBoxValue box = Fortran::lower::createMutableBox(
converter, loc, var, boxAlloc, nonDeferredLenParams,
/*alwaysUseBox=*/
converter.getLoweringOptions().getLowerToHighLevelFIR());
converter.getLoweringOptions().getLowerToHighLevelFIR(),
getAllocatorIdx(var.getSymbol()));
genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
replace);
return;
Expand Down
22 changes: 15 additions & 7 deletions flang/lib/Optimizer/Builder/MutableBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ class MutablePropertyWriter {
public:
MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
mlir::Value typeSourceBox = {})
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
mlir::Value typeSourceBox = {}, unsigned allocator = 0)
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
allocator{allocator} {}
/// Update MutableBoxValue with new address, shape and length parameters.
/// Extents and lbounds must all have index type.
/// lbounds can be empty in which case all ones is assumed.
Expand Down Expand Up @@ -242,7 +243,7 @@ class MutablePropertyWriter {
// declared type, not retain the previous dynamic type.
auto deallocatedBox = fir::factory::createUnallocatedBox(
builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
typeSourceBox);
typeSourceBox, allocator);
builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
}
}
Expand Down Expand Up @@ -276,7 +277,8 @@ class MutablePropertyWriter {
/// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
mlir::ValueRange extents, mlir::ValueRange lengths,
mlir::Value tdesc = {}) {
mlir::Value tdesc = {},
unsigned allocator = kDefaultAllocator) {
mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
extents, lengths, tdesc);
builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
Expand Down Expand Up @@ -322,13 +324,15 @@ class MutablePropertyWriter {
mlir::Location loc;
fir::MutableBoxValue box;
mlir::Value typeSourceBox;
unsigned allocator;
};

} // namespace

mlir::Value fir::factory::createUnallocatedBox(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
unsigned allocator) {
auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
// Giving unallocated/disassociated status to assumed-rank POINTER/
// ALLOCATABLE is not directly possible to a Fortran user. But the
Expand Down Expand Up @@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
mlir::Value emptySlice;
auto embox = builder.create<fir::EmboxOp>(
loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
if (allocator != 0)
embox.setAllocatorIdx(allocator);
if (isAssumedRank)
return builder.createConvert(loc, boxType, embox);
return embox;
Expand Down Expand Up @@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
mlir::Location loc,
const fir::MutableBoxValue &box,
bool polymorphicSetType) {
bool polymorphicSetType,
unsigned allocator) {
if (box.isPolymorphic() && polymorphicSetType) {
// 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
// same as its declared type.
Expand All @@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
return;
}
}
MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
MutablePropertyWriter{builder, loc, box, {}, allocator}
.setUnallocatedStatus();
}

static llvm::SmallVector<mlir::Value>
Expand Down
2 changes: 1 addition & 1 deletion flang/runtime/CUDA/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/allocator.h"
#include "../allocator-registry.h"
#include "../derived.h"
#include "../stat.h"
#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
#include "flang/Runtime/allocator-registry.h"

#include "cuda.h"

Expand Down
7 changes: 7 additions & 0 deletions flang/test/Lower/CUDA/cuda-allocatable.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
Expand All @@ -37,6 +38,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
Expand All @@ -60,6 +62,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
Expand All @@ -78,6 +81,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
Expand All @@ -97,6 +101,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
Expand All @@ -118,6 +123,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
Expand All @@ -140,6 +146,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
Expand Down
1 change: 1 addition & 0 deletions flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/allocatable.h"
#include "flang/Runtime/allocator-registry.h"

#include "cuda.h"

Expand Down
Loading