Skip to content

Commit 0b31f08

Browse files
authored
[flang][cuda] Add support for NV_CUDAFOR_DEVICE_IS_MANAGED (#133778)
Add support for the environment variable `NV_CUDAFOR_DEVICE_IS_MANAGED` as described in the documentation: https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#controlling-device-data-is-managed. This mainly switch device allocation to managed allocation.
1 parent b739a3c commit 0b31f08

File tree

4 files changed

+28
-2
lines changed

4 files changed

+28
-2
lines changed

flang-rt/include/flang-rt/runtime/environment.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct ExecutionEnvironment {
5959

6060
// CUDA related variables
6161
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
62+
bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED
6263
};
6364

6465
RT_OFFLOAD_VAR_GROUP_BEGIN

flang-rt/lib/cuda/allocator.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "flang/Runtime/CUDA/allocator.h"
1010
#include "flang-rt/runtime/allocator-registry.h"
1111
#include "flang-rt/runtime/derived.h"
12+
#include "flang-rt/runtime/environment.h"
1213
#include "flang-rt/runtime/stat.h"
1314
#include "flang-rt/runtime/terminator.h"
1415
#include "flang-rt/runtime/type-info.h"
@@ -43,7 +44,12 @@ void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
4344

4445
void *CUFAllocDevice(std::size_t sizeInBytes) {
4546
void *p;
46-
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
47+
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
48+
CUDA_REPORT_IF_ERROR(
49+
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
50+
} else {
51+
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
52+
}
4753
return p;
4854
}
4955

flang-rt/lib/cuda/memory.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "flang/Runtime/CUDA/memory.h"
1010
#include "flang-rt/runtime/assign-impl.h"
1111
#include "flang-rt/runtime/descriptor.h"
12+
#include "flang-rt/runtime/environment.h"
1213
#include "flang-rt/runtime/terminator.h"
1314
#include "flang/Runtime/CUDA/common.h"
1415
#include "flang/Runtime/CUDA/descriptor.h"
@@ -26,7 +27,12 @@ void *RTDEF(CUFMemAlloc)(
2627
void *ptr = nullptr;
2728
if (bytes != 0) {
2829
if (type == kMemTypeDevice) {
29-
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
30+
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
31+
CUDA_REPORT_IF_ERROR(
32+
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
33+
} else {
34+
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
35+
}
3036
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
3137
CUDA_REPORT_IF_ERROR(
3238
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));

flang-rt/lib/runtime/environment.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,19 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
155155
}
156156
}
157157

158+
if (auto *x{std::getenv("NV_CUDAFOR_DEVICE_IS_MANAGED")}) {
159+
char *end;
160+
auto n{std::strtol(x, &end, 10)};
161+
if (n >= 0 && n <= 1 && *end == '\0') {
162+
cudaDeviceIsManaged = n != 0;
163+
} else {
164+
std::fprintf(stderr,
165+
"Fortran runtime: NV_CUDAFOR_DEVICE_IS_MANAGED=%s is invalid; "
166+
"ignored\n",
167+
x);
168+
}
169+
}
170+
158171
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
159172
}
160173

0 commit comments

Comments
 (0)