Skip to content

Commit bbdb1e4

Browse files
authored
[flang][cuda] Set the allocator on fir.embox operation (#101722)
This patch set the `allocator_idx` attribute for allocatable descriptor that have specific CUDA attribute.
1 parent a21fc4c commit bbdb1e4

File tree

9 files changed

+57
-18
lines changed

9 files changed

+57
-18
lines changed

flang/include/flang/Lower/Allocatable.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "flang/Lower/AbstractConverter.h"
1717
#include "flang/Optimizer/Builder/MutableBox.h"
18+
#include "flang/Runtime/allocator-registry.h"
1819
#include "llvm/ADT/StringRef.h"
1920

2021
namespace mlir {
@@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
7071
fir::MutableBoxValue
7172
createMutableBox(AbstractConverter &converter, mlir::Location loc,
7273
const pft::Variable &var, mlir::Value boxAddr,
73-
mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
74+
mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
75+
unsigned allocator = kDefaultAllocator);
7476

7577
/// Assign a boxed value to a boxed variable, \p box (known as a
7678
/// MutableBoxValue). Expression \p source will be lowered to build the

flang/include/flang/Optimizer/Builder/MutableBox.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H
1515

1616
#include "flang/Optimizer/Builder/BoxValue.h"
17+
#include "flang/Runtime/allocator-registry.h"
1718
#include "llvm/ADT/StringRef.h"
1819

1920
namespace mlir {
@@ -43,7 +44,8 @@ namespace fir::factory {
4344
mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
4445
mlir::Type boxType,
4546
mlir::ValueRange nonDeferredParams,
46-
mlir::Value typeSourceBox = {});
47+
mlir::Value typeSourceBox = {},
48+
unsigned allocator = kDefaultAllocator);
4749

4850
/// Create a MutableBoxValue for a temporary allocatable.
4951
/// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
@@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
8082
/// address field of the MutableBoxValue to zero.
8183
void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
8284
const fir::MutableBoxValue &box,
83-
bool polymorphicSetType = true);
85+
bool polymorphicSetType = true,
86+
unsigned allocator = kDefaultAllocator);
8487

8588
/// Generate code to conditionally reallocate a MutableBoxValue with a new
8689
/// shape, lower bounds, and LEN parameters if it is unallocated or if its

flang/include/flang/Runtime/CUDA/allocator.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111

1212
#include "flang/Runtime/descriptor.h"
1313

14-
static constexpr unsigned kPinnedAllocatorPos = 1;
15-
static constexpr unsigned kDeviceAllocatorPos = 2;
16-
static constexpr unsigned kManagedAllocatorPos = 3;
17-
1814
#define CUDA_REPORT_IF_ERROR(expr) \
1915
[](CUresult result) { \
2016
if (!result) \

flang/include/flang/Runtime/allocator-registry.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515

1616
static constexpr unsigned kDefaultAllocator = 0;
1717

18+
// Allocator used for CUF
19+
static constexpr unsigned kPinnedAllocatorPos = 1;
20+
static constexpr unsigned kDeviceAllocatorPos = 2;
21+
static constexpr unsigned kManagedAllocatorPos = 3;
22+
1823
#define MAX_ALLOCATOR 5
1924

2025
namespace Fortran::runtime {

flang/lib/Lower/Allocatable.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
10521052
fir::MutableBoxValue Fortran::lower::createMutableBox(
10531053
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
10541054
const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
1055-
mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {
1056-
1055+
mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
10571056
fir::MutableProperties mutableProperties = createMutableProperties(
10581057
converter, loc, var, nonDeferredParams, alwaysUseBox);
10591058
fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
10601059
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
10611060
if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
10621061
fir::factory::disassociateMutableBox(builder, loc, box,
1063-
/*polymorphicSetType=*/false);
1062+
/*polymorphicSetType=*/false,
1063+
allocator);
10641064
return box;
10651065
}
10661066

flang/lib/Lower/ConvertVariable.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "flang/Optimizer/Support/FatalError.h"
4040
#include "flang/Optimizer/Support/InternalNames.h"
4141
#include "flang/Optimizer/Support/Utils.h"
42+
#include "flang/Runtime/allocator-registry.h"
4243
#include "flang/Semantics/runtime-type-info.h"
4344
#include "flang/Semantics/tools.h"
4445
#include "llvm/Support/CommandLine.h"
@@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
18511852
replace);
18521853
}
18531854

1855+
static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
1856+
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
1857+
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
1858+
if (cudaAttr) {
1859+
if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
1860+
return kPinnedAllocatorPos;
1861+
if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
1862+
return kDeviceAllocatorPos;
1863+
if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
1864+
*cudaAttr == Fortran::common::CUDADataAttr::Unified)
1865+
return kManagedAllocatorPos;
1866+
}
1867+
return kDefaultAllocator;
1868+
}
1869+
18541870
/// Lower specification expressions and attributes of variable \p var and
18551871
/// add it to the symbol map. For a global or an alias, the address must be
18561872
/// pre-computed and provided in \p preAlloc. A dummy argument for the current
@@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
19401956
fir::MutableBoxValue box = Fortran::lower::createMutableBox(
19411957
converter, loc, var, boxAlloc, nonDeferredLenParams,
19421958
/*alwaysUseBox=*/
1943-
converter.getLoweringOptions().getLowerToHighLevelFIR());
1959+
converter.getLoweringOptions().getLowerToHighLevelFIR(),
1960+
getAllocatorIdx(var.getSymbol()));
19441961
genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
19451962
replace);
19461963
return;

flang/lib/Optimizer/Builder/MutableBox.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,9 @@ class MutablePropertyWriter {
199199
public:
200200
MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
201201
const fir::MutableBoxValue &box,
202-
mlir::Value typeSourceBox = {})
203-
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
202+
mlir::Value typeSourceBox = {}, unsigned allocator = 0)
203+
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
204+
allocator{allocator} {}
204205
/// Update MutableBoxValue with new address, shape and length parameters.
205206
/// Extents and lbounds must all have index type.
206207
/// lbounds can be empty in which case all ones is assumed.
@@ -242,7 +243,7 @@ class MutablePropertyWriter {
242243
// declared type, not retain the previous dynamic type.
243244
auto deallocatedBox = fir::factory::createUnallocatedBox(
244245
builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
245-
typeSourceBox);
246+
typeSourceBox, allocator);
246247
builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
247248
}
248249
}
@@ -276,7 +277,8 @@ class MutablePropertyWriter {
276277
/// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
277278
void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
278279
mlir::ValueRange extents, mlir::ValueRange lengths,
279-
mlir::Value tdesc = {}) {
280+
mlir::Value tdesc = {},
281+
unsigned allocator = kDefaultAllocator) {
280282
mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
281283
extents, lengths, tdesc);
282284
builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
@@ -322,13 +324,15 @@ class MutablePropertyWriter {
322324
mlir::Location loc;
323325
fir::MutableBoxValue box;
324326
mlir::Value typeSourceBox;
327+
unsigned allocator;
325328
};
326329

327330
} // namespace
328331

329332
mlir::Value fir::factory::createUnallocatedBox(
330333
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
331-
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
334+
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
335+
unsigned allocator) {
332336
auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
333337
// Giving unallocated/disassociated status to assumed-rank POINTER/
334338
// ALLOCATABLE is not directly possible to a Fortran user. But the
@@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
374378
mlir::Value emptySlice;
375379
auto embox = builder.create<fir::EmboxOp>(
376380
loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
381+
if (allocator != 0)
382+
embox.setAllocatorIdx(allocator);
377383
if (isAssumedRank)
378384
return builder.createConvert(loc, boxType, embox);
379385
return embox;
@@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
691697
void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
692698
mlir::Location loc,
693699
const fir::MutableBoxValue &box,
694-
bool polymorphicSetType) {
700+
bool polymorphicSetType,
701+
unsigned allocator) {
695702
if (box.isPolymorphic() && polymorphicSetType) {
696703
// 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
697704
// same as its declared type.
@@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
704711
return;
705712
}
706713
}
707-
MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
714+
MutablePropertyWriter{builder, loc, box, {}, allocator}
715+
.setUnallocatedStatus();
708716
}
709717

710718
static llvm::SmallVector<mlir::Value>

flang/test/Lower/CUDA/cuda-allocatable.cuf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ end subroutine
1111

1212
! CHECK-LABEL: func.func @_QPsub1()
1313
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
14+
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
1415
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
1516
! CHECK: fir.call @_FortranAAllocatableSetBounds
1617
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
@@ -37,6 +38,7 @@ end subroutine
3738

3839
! CHECK-LABEL: func.func @_QPsub2()
3940
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
41+
! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
4042
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
4143
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
4244
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -60,6 +62,7 @@ end subroutine
6062

6163
! CHECK-LABEL: func.func @_QPsub3()
6264
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
65+
! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
6366
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
6467
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
6568
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
@@ -78,6 +81,7 @@ end subroutine
7881

7982
! CHECK-LABEL: func.func @_QPsub4()
8083
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
84+
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
8185
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
8286
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
8387
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -97,6 +101,7 @@ end subroutine
97101

98102
! CHECK-LABEL: func.func @_QPsub5()
99103
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
104+
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
100105
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
101106
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
102107
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -118,6 +123,7 @@ end subroutine
118123

119124
! CHECK-LABEL: func.func @_QPsub6()
120125
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
126+
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
121127
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
122128
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
123129
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
@@ -140,6 +146,7 @@ end subroutine
140146

141147
! CHECK-LABEL: func.func @_QPsub7()
142148
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
149+
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
143150
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
144151
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
145152
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)

flang/unittests/Runtime/CUDA/AllocatorCUF.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "flang/Common/Fortran.h"
1212
#include "flang/Runtime/CUDA/allocator.h"
1313
#include "flang/Runtime/allocatable.h"
14+
#include "flang/Runtime/allocator-registry.h"
1415

1516
#include "cuda.h"
1617

0 commit comments

Comments
 (0)