Skip to content

Commit 5ebe22a

Browse files
authored
[flang][cuda] Add async id to allocators (#134724)
Add async id to allocators in preparation for stream allocation.
1 parent 7117dea commit 5ebe22a

File tree

6 files changed

+23
-16
lines changed

6 files changed

+23
-16
lines changed

flang-rt/include/flang-rt/runtime/allocator-registry.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,27 @@
1111

1212
#include "flang/Common/api-attrs.h"
1313
#include "flang/Runtime/allocator-registry-consts.h"
14+
#include <cstdint>
1415
#include <cstdlib>
1516
#include <vector>
1617

1718
#define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor.
1819

1920
namespace Fortran::runtime {
2021

21-
using AllocFct = void *(*)(std::size_t);
22+
using AllocFct = void *(*)(std::size_t, std::int64_t);
2223
using FreeFct = void (*)(void *);
2324

2425
typedef struct Allocator_t {
2526
AllocFct alloc{nullptr};
2627
FreeFct free{nullptr};
2728
} Allocator_t;
2829

29-
#ifdef RT_DEVICE_COMPILATION
30-
static RT_API_ATTRS void *MallocWrapper(std::size_t size) {
30+
static RT_API_ATTRS void *MallocWrapper(
31+
std::size_t size, [[maybe_unused]] std::int64_t) {
3132
return std::malloc(size);
3233
}
34+
#ifdef RT_DEVICE_COMPILATION
3335
static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); }
3436
#endif
3537

@@ -39,7 +41,7 @@ struct AllocatorRegistry {
3941
: allocators{{&MallocWrapper, &FreeWrapper}} {}
4042
#else
4143
constexpr AllocatorRegistry() {
42-
allocators[kDefaultAllocator] = {&std::malloc, &std::free};
44+
allocators[kDefaultAllocator] = {&MallocWrapper, &std::free};
4345
};
4446
#endif
4547
RT_API_ATTRS void Register(int, Allocator_t);

flang-rt/lib/cuda/allocator.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,17 @@ void RTDEF(CUFRegisterAllocator)() {
3434
}
3535
}
3636

37-
void *CUFAllocPinned(std::size_t sizeInBytes) {
37+
void *CUFAllocPinned(
38+
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
3839
void *p;
3940
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
4041
return p;
4142
}
4243

4344
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
4445

45-
void *CUFAllocDevice(std::size_t sizeInBytes) {
46+
void *CUFAllocDevice(
47+
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
4648
void *p;
4749
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
4850
CUDA_REPORT_IF_ERROR(
@@ -55,7 +57,8 @@ void *CUFAllocDevice(std::size_t sizeInBytes) {
5557

5658
void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
5759

58-
void *CUFAllocManaged(std::size_t sizeInBytes) {
60+
void *CUFAllocManaged(
61+
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
5962
void *p;
6063
CUDA_REPORT_IF_ERROR(
6164
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -64,9 +67,10 @@ void *CUFAllocManaged(std::size_t sizeInBytes) {
6467

6568
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
6669

67-
void *CUFAllocUnified(std::size_t sizeInBytes) {
70+
void *CUFAllocUnified(
71+
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
6872
// Call alloc managed for the time being.
69-
return CUFAllocManaged(sizeInBytes);
73+
return CUFAllocManaged(sizeInBytes, asyncId);
7074
}
7175

7276
void CUFFreeUnified(void *p) {

flang-rt/lib/cuda/descriptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ RT_EXT_API_GROUP_BEGIN
2020

2121
Descriptor *RTDEF(CUFAllocDescriptor)(
2222
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
23-
return reinterpret_cast<Descriptor *>(CUFAllocManaged(sizeInBytes));
23+
return reinterpret_cast<Descriptor *>(
24+
CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
2425
}
2526

2627
void RTDEF(CUFFreeDescriptor)(

flang-rt/lib/runtime/descriptor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate() {
170170
// Zero size allocation is possible in Fortran and the resulting
171171
// descriptor must be allocated/associated. Since std::malloc(0)
172172
// result is implementation defined, always allocate at least one byte.
173-
void *p{alloc(byteSize ? byteSize : 1)};
173+
void *p{alloc(byteSize ? byteSize : 1, /*asyncId=*/-1)};
174174
if (!p) {
175175
return CFI_ERROR_MEM_ALLOCATION;
176176
}

flang-rt/lib/runtime/pointer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload(
129129
byteSize = ((byteSize + align - 1) / align) * align;
130130
std::size_t total{byteSize + sizeof(std::uintptr_t)};
131131
AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)};
132-
void *p{alloc(total)};
132+
void *p{alloc(total, /*asyncId=*/-1)};
133133
if (p && allocatorIdx == 0) {
134134
// Fill the footer word with the XOR of the ones' complement of
135135
// the base address, which is a value that would be highly unlikely

flang/include/flang/Runtime/CUDA/allocator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ extern "C" {
2020
void RTDECL(CUFRegisterAllocator)();
2121
}
2222

23-
void *CUFAllocPinned(std::size_t);
23+
void *CUFAllocPinned(std::size_t, std::int64_t);
2424
void CUFFreePinned(void *);
2525

26-
void *CUFAllocDevice(std::size_t);
26+
void *CUFAllocDevice(std::size_t, std::int64_t);
2727
void CUFFreeDevice(void *);
2828

29-
void *CUFAllocManaged(std::size_t);
29+
void *CUFAllocManaged(std::size_t, std::int64_t);
3030
void CUFFreeManaged(void *);
3131

32-
void *CUFAllocUnified(std::size_t);
32+
void *CUFAllocUnified(std::size_t, std::int64_t);
3333
void CUFFreeUnified(void *);
3434

3535
} // namespace Fortran::runtime::cuda

0 commit comments

Comments
 (0)