Skip to content

Commit 6df4e7c

Browse files
clementvalvzakhari
andauthored
[flang] Add ability to have special allocator for descriptor data (#100690)
This patch enhances the descriptor with the ability to have specialized allocator. The allocators are registered in a dedicated registry and the index of the desired allocator is stored in the descriptor. The default allocator, std::malloc, is registered at index 0. In order to have this allocator index in the descriptor, the f18Addendum field is repurposed to be able to hold the presence flag for the addendum (lsb) and the allocator index. Since this is a change in the semantic and name of the 7th field of the descriptor, the CFI_VERSION is bumped to the date of the initial change. This patch only adds the ability to have this features as part of the descriptor but does not add specific allocator yet. CUDA fortran will be the first user of this feature to allocate descriptor data in the different type of device memory base on the CUDA attribute. --------- Co-authored-by: Slava Zakharin <[email protected]>
1 parent 2177a17 commit 6df4e7c

22 files changed

+187
-57
lines changed

flang/include/flang/ISO_Fortran_binding.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#endif
3131

3232
/* 18.5.4 */
33-
#define CFI_VERSION 20180515
33+
#define CFI_VERSION 20240719
3434

3535
#define CFI_MAX_RANK 15
3636
typedef unsigned char CFI_rank_t;
@@ -146,7 +146,9 @@ extern "C++" template <typename T> struct FlexibleArray : T {
146146
CFI_rank_t rank; /* [0 .. CFI_MAX_RANK] */ \
147147
CFI_type_t type; \
148148
CFI_attribute_t attribute; \
149-
unsigned char f18Addendum;
149+
/* This encodes both the presence of the f18Addendum and the index of the \
150+
* allocator used to managed memory of the data hold by the descriptor. */ \
151+
unsigned char extra;
150152

151153
typedef struct CFI_cdesc_t {
152154
_CFI_CDESC_T_HEADER_MEMBERS

flang/include/flang/Optimizer/CodeGen/TBAABuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ namespace fir {
5555
// <@type_desc_3, 20>, // rank
5656
// <@type_desc_3, 21>, // type
5757
// <@type_desc_3, 22>, // attribute
58-
// <@type_desc_3, 23>} // f18Addendum
58+
// <@type_desc_3, 23>} // extra
5959
// }
6060
// llvm.tbaa_type_desc @type_desc_5 {
6161
// id = "CFI_cdesc_t_dim1",
@@ -65,7 +65,7 @@ namespace fir {
6565
// <@type_desc_3, 20>, // rank
6666
// <@type_desc_3, 21>, // type
6767
// <@type_desc_3, 22>, // attribute
68-
// <@type_desc_3, 23>, // f18Addendum
68+
// <@type_desc_3, 23>, // extra
6969
// <@type_desc_3, 24>, // dim[0].lower_bound
7070
// <@type_desc_3, 32>, // dim[0].extent
7171
// <@type_desc_3, 40>} // dim[0].sm
@@ -78,7 +78,7 @@ namespace fir {
7878
// <@type_desc_3, 20>, // rank
7979
// <@type_desc_3, 21>, // type
8080
// <@type_desc_3, 22>, // attribute
81-
// <@type_desc_3, 23>, // f18Addendum
81+
// <@type_desc_3, 23>, // extra
8282
// <@type_desc_3, 24>, // dim[0].lower_bound
8383
// <@type_desc_3, 32>, // dim[0].extent
8484
// <@type_desc_3, 40>, // dim[0].sm

flang/include/flang/Optimizer/CodeGen/TypeConverter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static constexpr unsigned kVersionPosInBox = 2;
2929
static constexpr unsigned kRankPosInBox = 3;
3030
static constexpr unsigned kTypePosInBox = 4;
3131
static constexpr unsigned kAttributePosInBox = 5;
32-
static constexpr unsigned kF18AddendumPosInBox = 6;
32+
static constexpr unsigned kExtraPosInBox = 6;
3333
static constexpr unsigned kDimsPosInBox = 7;
3434
static constexpr unsigned kOptTypePtrPosInBox = 8;
3535
static constexpr unsigned kOptRowTypePosInBox = 9;

flang/include/flang/Runtime/descriptor.h

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class Dimension {
9292
// The storage for this object follows the last used dim[] entry in a
9393
// Descriptor (CFI_cdesc_t) generic descriptor. Space matters here, since
9494
// descriptors serve as POINTER and ALLOCATABLE components of derived type
95-
// instances. The presence of this structure is implied by the flag
96-
// CFI_cdesc_t.f18Addendum, and the number of elements in the len_[]
95+
// instances. The presence of this structure is encoded in the
96+
// CFI_cdesc_t.extra field, and the number of elements in the len_[]
9797
// array is determined by derivedType_->LenParameters().
9898
class DescriptorAddendum {
9999
public:
@@ -339,14 +339,14 @@ class Descriptor {
339339
const SubscriptValue *, const int *permutation = nullptr) const;
340340

341341
RT_API_ATTRS DescriptorAddendum *Addendum() {
342-
if (raw_.f18Addendum != 0) {
342+
if (HasAddendum()) {
343343
return reinterpret_cast<DescriptorAddendum *>(&GetDimension(rank()));
344344
} else {
345345
return nullptr;
346346
}
347347
}
348348
RT_API_ATTRS const DescriptorAddendum *Addendum() const {
349-
if (raw_.f18Addendum != 0) {
349+
if (HasAddendum()) {
350350
return reinterpret_cast<const DescriptorAddendum *>(
351351
&GetDimension(rank()));
352352
} else {
@@ -420,6 +420,27 @@ class Descriptor {
420420

421421
void Dump(FILE * = stdout) const;
422422

423+
// Value of the addendum presence flag.
424+
#define _CFI_ADDENDUM_FLAG 1
425+
// Number of bits needed to be shifted when manipulating the allocator index.
426+
#define _CFI_ALLOCATOR_IDX_SHIFT 1
427+
// Allocator index mask.
428+
#define _CFI_ALLOCATOR_IDX_MASK 0b00001110
429+
430+
RT_API_ATTRS inline bool HasAddendum() const {
431+
return raw_.extra & _CFI_ADDENDUM_FLAG;
432+
}
433+
RT_API_ATTRS inline void SetHasAddendum() {
434+
raw_.extra |= _CFI_ADDENDUM_FLAG;
435+
}
436+
RT_API_ATTRS inline int GetAllocIdx() const {
437+
return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT;
438+
}
439+
RT_API_ATTRS inline void SetAllocIdx(int pos) {
440+
raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits.
441+
raw_.extra |= (pos << _CFI_ALLOCATOR_IDX_SHIFT);
442+
}
443+
423444
private:
424445
ISO::CFI_cdesc_t raw_;
425446
};

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,9 +1241,10 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
12411241
descriptor =
12421242
insertField(rewriter, loc, descriptor, {kAttributePosInBox},
12431243
this->genI32Constant(loc, rewriter, getCFIAttr(boxTy)));
1244+
12441245
const bool hasAddendum = fir::boxHasAddendum(boxTy);
12451246
descriptor =
1246-
insertField(rewriter, loc, descriptor, {kF18AddendumPosInBox},
1247+
insertField(rewriter, loc, descriptor, {kExtraPosInBox},
12471248
this->genI32Constant(loc, rewriter, hasAddendum ? 1 : 0));
12481249

12491250
if (hasAddendum) {

flang/lib/Optimizer/CodeGen/TypeConverter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ bool LLVMTypeConverter::requiresExtendedDesc(mlir::Type boxElementType) const {
177177
// the addendum defined in descriptor.h.
178178
mlir::Type LLVMTypeConverter::convertBoxTypeAsStruct(BaseBoxType box,
179179
int rank) const {
180-
// (base_addr*, elem_len, version, rank, type, attribute, f18Addendum, [dim]
180+
// (base_addr*, elem_len, version, rank, type, attribute, extra, [dim]
181181
llvm::SmallVector<mlir::Type> dataDescFields;
182182
mlir::Type ele = box.getEleTy();
183183
// remove fir.heap/fir.ref/fir.ptr
@@ -206,9 +206,9 @@ mlir::Type LLVMTypeConverter::convertBoxTypeAsStruct(BaseBoxType box,
206206
// attribute
207207
dataDescFields.push_back(
208208
getDescFieldTypeModel<kAttributePosInBox>()(&getContext()));
209-
// f18Addendum
209+
// extra
210210
dataDescFields.push_back(
211-
getDescFieldTypeModel<kF18AddendumPosInBox>()(&getContext()));
211+
getDescFieldTypeModel<kExtraPosInBox>()(&getContext()));
212212
// [dims]
213213
if (rank == unknownRank()) {
214214
if (auto seqTy = mlir::dyn_cast<SequenceType>(ele))

flang/runtime/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ add_subdirectory(Float128Math)
107107

108108
set(sources
109109
ISO_Fortran_binding.cpp
110+
allocator-registry.cpp
110111
allocatable.cpp
111112
array-constructor.cpp
112113
assign.cpp
@@ -178,6 +179,7 @@ include(AddFlangOffloadRuntime)
178179
set(supported_files
179180
ISO_Fortran_binding.cpp
180181
allocatable.cpp
182+
allocator-registry.cpp
181183
array-constructor.cpp
182184
assign.cpp
183185
buffer.cpp

flang/runtime/ISO_Fortran_util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor,
8686
descriptor->rank = rank;
8787
descriptor->type = type;
8888
descriptor->attribute = attribute;
89-
descriptor->f18Addendum = 0;
89+
descriptor->extra = 0;
9090
std::size_t byteSize{elem_len};
9191
constexpr std::size_t lower_bound{0};
9292
if (base_addr) {

flang/runtime/allocator-registry.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//===-- runtime/allocator-registry.cpp ------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "allocator-registry.h"
10+
#include "terminator.h"
11+
12+
namespace Fortran::runtime {
13+
14+
#ifndef FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
15+
RT_OFFLOAD_VAR_GROUP_BEGIN
16+
RT_VAR_ATTRS AllocatorRegistry allocatorRegistry;
17+
RT_OFFLOAD_VAR_GROUP_END
18+
#endif // FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
19+
20+
RT_OFFLOAD_API_GROUP_BEGIN
21+
RT_API_ATTRS void AllocatorRegistry::Register(int pos, Allocator_t allocator) {
22+
// pos 0 is reserved for the default allocator and is registered in the
23+
// struct ctor.
24+
INTERNAL_CHECK(pos > 0 && pos < MAX_ALLOCATOR);
25+
allocators[pos] = allocator;
26+
}
27+
28+
RT_API_ATTRS AllocFct AllocatorRegistry::GetAllocator(int pos) {
29+
INTERNAL_CHECK(pos >= 0 && pos < MAX_ALLOCATOR);
30+
AllocFct f{allocators[pos].alloc};
31+
INTERNAL_CHECK(f != nullptr);
32+
return f;
33+
}
34+
35+
RT_API_ATTRS FreeFct AllocatorRegistry::GetDeallocator(int pos) {
36+
INTERNAL_CHECK(pos >= 0 && pos < MAX_ALLOCATOR);
37+
FreeFct f{allocators[pos].free};
38+
INTERNAL_CHECK(f != nullptr);
39+
return f;
40+
}
41+
RT_OFFLOAD_API_GROUP_END
42+
} // namespace Fortran::runtime

flang/runtime/allocator-registry.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===-- runtime/allocator-registry.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_RUNTIME_ALLOCATOR_H_
10+
#define FORTRAN_RUNTIME_ALLOCATOR_H_
11+
12+
#include "flang/Common/api-attrs.h"
13+
#include <cstdlib>
14+
#include <vector>
15+
16+
#define MAX_ALLOCATOR 5
17+
18+
namespace Fortran::runtime {
19+
20+
using AllocFct = void *(*)(std::size_t);
21+
using FreeFct = void (*)(void *);
22+
23+
typedef struct Allocator_t {
24+
AllocFct alloc{nullptr};
25+
FreeFct free{nullptr};
26+
} Allocator_t;
27+
28+
#ifdef RT_DEVICE_COMPILATION
29+
static RT_API_ATTRS void *MallocWrapper(std::size_t size) {
30+
return std::malloc(size);
31+
}
32+
static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); }
33+
#endif
34+
35+
struct AllocatorRegistry {
36+
#ifdef RT_DEVICE_COMPILATION
37+
RT_API_ATTRS constexpr AllocatorRegistry()
38+
: allocators{{&MallocWrapper, &FreeWrapper}} {}
39+
#else
40+
constexpr AllocatorRegistry() { allocators[0] = {&std::malloc, &std::free}; };
41+
#endif
42+
RT_API_ATTRS void Register(int, Allocator_t);
43+
RT_API_ATTRS AllocFct GetAllocator(int pos);
44+
RT_API_ATTRS FreeFct GetDeallocator(int pos);
45+
46+
Allocator_t allocators[MAX_ALLOCATOR];
47+
};
48+
49+
RT_OFFLOAD_VAR_GROUP_BEGIN
50+
extern RT_VAR_ATTRS AllocatorRegistry allocatorRegistry;
51+
RT_OFFLOAD_VAR_GROUP_END
52+
53+
} // namespace Fortran::runtime
54+
55+
#endif // FORTRAN_RUNTIME_ALLOCATOR_H_

flang/runtime/descriptor.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "flang/Runtime/descriptor.h"
1010
#include "ISO_Fortran_util.h"
11+
#include "allocator-registry.h"
1112
#include "derived.h"
1213
#include "memory.h"
1314
#include "stat.h"
@@ -50,7 +51,9 @@ RT_API_ATTRS void Descriptor::Establish(TypeCode t, std::size_t elementBytes,
5051
GetDimension(j).SetByteStride(0);
5152
}
5253
}
53-
raw_.f18Addendum = addendum;
54+
if (addendum) {
55+
SetHasAddendum();
56+
}
5457
DescriptorAddendum *a{Addendum()};
5558
RUNTIME_CHECK(terminator, addendum == (a != nullptr));
5659
if (a) {
@@ -162,7 +165,9 @@ RT_API_ATTRS int Descriptor::Allocate() {
162165
// Zero size allocation is possible in Fortran and the resulting
163166
// descriptor must be allocated/associated. Since std::malloc(0)
164167
// result is implementation defined, always allocate at least one byte.
165-
void *p{byteSize ? std::malloc(byteSize) : std::malloc(1)};
168+
169+
AllocFct alloc{allocatorRegistry.GetAllocator(GetAllocIdx())};
170+
void *p{alloc(byteSize ? byteSize : 1)};
166171
if (!p) {
167172
return CFI_ERROR_MEM_ALLOCATION;
168173
}
@@ -204,7 +209,8 @@ RT_API_ATTRS int Descriptor::Deallocate() {
204209
if (!descriptor.base_addr) {
205210
return CFI_ERROR_BASE_ADDR_NULL;
206211
} else {
207-
std::free(descriptor.base_addr);
212+
FreeFct free{allocatorRegistry.GetDeallocator(GetAllocIdx())};
213+
free(descriptor.base_addr);
208214
descriptor.base_addr = nullptr;
209215
return CFI_SUCCESS;
210216
}
@@ -290,7 +296,9 @@ void Descriptor::Dump(FILE *f) const {
290296
std::fprintf(f, " rank %d\n", static_cast<int>(raw_.rank));
291297
std::fprintf(f, " type %d\n", static_cast<int>(raw_.type));
292298
std::fprintf(f, " attribute %d\n", static_cast<int>(raw_.attribute));
293-
std::fprintf(f, " addendum %d\n", static_cast<int>(raw_.f18Addendum));
299+
std::fprintf(f, " extra %d\n", static_cast<int>(raw_.extra));
300+
std::fprintf(f, " addendum %d\n", static_cast<int>(HasAddendum()));
301+
std::fprintf(f, " alloc_idx %d\n", static_cast<int>(GetAllocIdx()));
294302
for (int j{0}; j < raw_.rank; ++j) {
295303
std::fprintf(f, " dim[%d] lower_bound %jd\n", j,
296304
static_cast<std::intmax_t>(raw_.dim[j].lower_bound));

0 commit comments

Comments
 (0)