Skip to content

Commit fa627d9

Browse files
authored
[flang][cuda] Add entry point for alloc/free and simple copy (#109867)
These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays.
1 parent 4ca4460 commit fa627d9

File tree

5 files changed

+103
-0
lines changed

5 files changed

+103
-0
lines changed

flang/include/flang/Runtime/CUDA/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@
1212
#include "flang/Runtime/descriptor.h"
1313
#include "flang/Runtime/entry-names.h"
1414

15+
/// Type of memory for allocation/deallocation
16+
static constexpr unsigned kMemTypeDevice = 0;
17+
static constexpr unsigned kMemTypeManaged = 1;
18+
static constexpr unsigned kMemTypeUnified = 2;
19+
static constexpr unsigned kMemTypePinned = 3;
20+
21+
/// Data transfer kinds.
1522
static constexpr unsigned kHostToDevice = 0;
1623
static constexpr unsigned kDeviceToHost = 1;
1724
static constexpr unsigned kDeviceToDevice = 2;

flang/include/flang/Runtime/CUDA/memory.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda {
1717

1818
extern "C" {
1919

20+
/// Allocate memory on the device.
21+
void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type,
22+
const char *sourceFile = nullptr, int sourceLine = 0);
23+
24+
/// Free memory allocated on the device.
25+
void RTDECL(CUFMemFree)(void *devicePtr, unsigned type,
26+
const char *sourceFile = nullptr, int sourceLine = 0);
27+
2028
/// Set value to the data hold by a descriptor. The \p value pointer must be
2129
/// addressable to the same amount of bytes specified by the element size of
2230
/// the descriptor \p desc.
2331
void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
2432
const char *sourceFile = nullptr, int sourceLine = 0);
2533

34+
/// Data transfer from a pointer to a pointer.
35+
void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
36+
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
37+
2638
/// Data transfer from a pointer to a descriptor.
2739
void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
2840
std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,

flang/runtime/CUDA/memory.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,71 @@
88

99
#include "flang/Runtime/CUDA/memory.h"
1010
#include "../terminator.h"
11+
#include "flang/Runtime/CUDA/common.h"
1112

1213
#include "cuda_runtime.h"
1314

1415
namespace Fortran::runtime::cuda {
1516
extern "C" {
1617

18+
void *RTDEF(CUFMemAlloc)(
19+
std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) {
20+
void *ptr = nullptr;
21+
if (bytes != 0) {
22+
if (type == kMemTypeDevice) {
23+
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
24+
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
25+
CUDA_REPORT_IF_ERROR(
26+
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
27+
} else if (type == kMemTypePinned) {
28+
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes));
29+
} else {
30+
Terminator terminator{sourceFile, sourceLine};
31+
terminator.Crash("unsupported memory type");
32+
}
33+
}
34+
return ptr;
35+
}
36+
37+
void RTDEF(CUFMemFree)(
38+
void *ptr, unsigned type, const char *sourceFile, int sourceLine) {
39+
if (!ptr)
40+
return;
41+
if (type == kMemTypeDevice || type == kMemTypeManaged ||
42+
type == kMemTypeUnified) {
43+
CUDA_REPORT_IF_ERROR(cudaFree(ptr));
44+
} else if (type == kMemTypePinned) {
45+
CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr));
46+
} else {
47+
Terminator terminator{sourceFile, sourceLine};
48+
terminator.Crash("unsupported memory type");
49+
}
50+
}
51+
1752
void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
1853
const char *sourceFile, int sourceLine) {
1954
Terminator terminator{sourceFile, sourceLine};
2055
terminator.Crash("not yet implemented: CUDA data transfer from a scalar "
2156
"value to a descriptor");
2257
}
2358

59+
void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
60+
unsigned mode, const char *sourceFile, int sourceLine) {
61+
cudaMemcpyKind kind;
62+
if (mode == kHostToDevice) {
63+
kind = cudaMemcpyHostToDevice;
64+
} else if (mode == kDeviceToHost) {
65+
kind = cudaMemcpyDeviceToHost;
66+
} else if (mode == kDeviceToDevice) {
67+
kind = cudaMemcpyDeviceToDevice;
68+
} else {
69+
Terminator terminator{sourceFile, sourceLine};
70+
terminator.Crash("host to host copy not supported");
71+
}
72+
// TODO: Use cudaMemcpyAsync when we have support for stream.
73+
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
74+
}
75+
2476
void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
2577
std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
2678
Terminator terminator{sourceFile, sourceLine};

flang/unittests/Runtime/CUDA/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME)
33
add_flang_unittest(FlangCufRuntimeTests
44
Allocatable.cpp
55
AllocatorCUF.cpp
6+
Memory.cpp
67
)
78

89
if (BUILD_SHARED_LIBS)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Runtime/CUDA/memory.h"
10+
#include "gtest/gtest.h"
11+
#include "../../../runtime/terminator.h"
12+
#include "flang/Common/Fortran.h"
13+
#include "flang/Runtime/CUDA/common.h"
14+
15+
#include "cuda_runtime.h"
16+
17+
using namespace Fortran::runtime::cuda;
18+
19+
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
20+
int *dev = (int *)RTNAME(CUFMemAlloc)(
21+
sizeof(int), kMemTypeDevice, __FILE__, __LINE__);
22+
EXPECT_TRUE(dev != 0);
23+
int host = 42;
24+
RTNAME(CUFDataTransferPtrPtr)
25+
((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__);
26+
host = 0;
27+
RTNAME(CUFDataTransferPtrPtr)
28+
((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__);
29+
EXPECT_EQ(42, host);
30+
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
31+
}

0 commit comments

Comments
 (0)