Skip to content

Commit 56b7923

Browse files
authored
[flang][cuda] Use the aysncId in device allocation (#135099)
Use `cudaMallocAsync` in the `CUFAllocDevice` allocator when asyncId is provided. More work is needed to be able to call `cudaFreeAsync` since the allocated address and stream needs to be tracked.
1 parent 6a63abc commit 56b7923

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

flang-rt/lib/cuda/allocator.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "flang/Runtime/CUDA/allocator.h"
1010
#include "flang-rt/runtime/allocator-registry.h"
1111
#include "flang-rt/runtime/derived.h"
12+
#include "flang-rt/runtime/descriptor.h"
1213
#include "flang-rt/runtime/environment.h"
1314
#include "flang-rt/runtime/stat.h"
1415
#include "flang-rt/runtime/terminator.h"
@@ -43,14 +44,18 @@ void *CUFAllocPinned(
4344

4445
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
4546

46-
void *CUFAllocDevice(
47-
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
47+
void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
4848
void *p;
4949
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
5050
CUDA_REPORT_IF_ERROR(
5151
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
5252
} else {
53-
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
53+
if (asyncId == kNoAsyncId) {
54+
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
55+
} else {
56+
CUDA_REPORT_IF_ERROR(
57+
cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
58+
}
5459
}
5560
return p;
5661
}

0 commit comments

Comments
 (0)