Skip to content

Commit 3247386

Browse files
authored
[flang][cuda] Data transfer with descriptor (#114598)
Reopen PR #114302 as it was automatically closed. Review in #114302
1 parent 1a18767 commit 3247386

File tree

2 files changed

+72
-2
lines changed

2 files changed

+72
-2
lines changed

flang/runtime/CUDA/memory.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,32 @@
99
#include "flang/Runtime/CUDA/memory.h"
1010
#include "../terminator.h"
1111
#include "flang/Runtime/CUDA/common.h"
12+
#include "flang/Runtime/assign.h"
1213

1314
#include "cuda_runtime.h"
1415

1516
namespace Fortran::runtime::cuda {
17+
static void *MemmoveHostToDevice(
18+
void *dst, const void *src, std::size_t count) {
19+
// TODO: Use cudaMemcpyAsync when we have support for stream.
20+
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
21+
return dst;
22+
}
23+
24+
static void *MemmoveDeviceToHost(
25+
void *dst, const void *src, std::size_t count) {
26+
// TODO: Use cudaMemcpyAsync when we have support for stream.
27+
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
28+
return dst;
29+
}
30+
31+
static void *MemmoveDeviceToDevice(
32+
void *dst, const void *src, std::size_t count) {
33+
// TODO: Use cudaMemcpyAsync when we have support for stream.
34+
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
35+
return dst;
36+
}
37+
1638
extern "C" {
1739

1840
void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
90112
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
91113
unsigned mode, const char *sourceFile, int sourceLine) {
92114
Terminator terminator{sourceFile, sourceLine};
93-
terminator.Crash(
94-
"not yet implemented: CUDA data transfer between two descriptors");
115+
MemmoveFct memmoveFct;
116+
if (mode == kHostToDevice) {
117+
memmoveFct = &MemmoveHostToDevice;
118+
} else if (mode == kDeviceToHost) {
119+
memmoveFct = &MemmoveDeviceToHost;
120+
} else if (mode == kDeviceToDevice) {
121+
memmoveFct = &MemmoveDeviceToDevice;
122+
}
123+
Fortran::runtime::Assign(
124+
*dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
95125
}
96126
}
97127
} // namespace Fortran::runtime::cuda

flang/unittests/Runtime/CUDA/Memory.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,17 @@
99
#include "flang/Runtime/CUDA/memory.h"
1010
#include "gtest/gtest.h"
1111
#include "../../../runtime/terminator.h"
12+
#include "../tools.h"
1213
#include "flang/Common/Fortran.h"
14+
#include "flang/Runtime/CUDA/allocator.h"
1315
#include "flang/Runtime/CUDA/common.h"
16+
#include "flang/Runtime/CUDA/descriptor.h"
17+
#include "flang/Runtime/allocatable.h"
18+
#include "flang/Runtime/allocator-registry.h"
1419

1520
#include "cuda_runtime.h"
1621

22+
using namespace Fortran::runtime;
1723
using namespace Fortran::runtime::cuda;
1824

1925
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
2935
EXPECT_EQ(42, host);
3036
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
3137
}
38+
39+
static OwningPtr<Descriptor> createAllocatable(
40+
Fortran::common::TypeCategory tc, int kind, int rank = 1) {
41+
return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
42+
CFI_attribute_allocatable);
43+
}
44+
45+
TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
46+
using Fortran::common::TypeCategory;
47+
RTNAME(CUFRegisterAllocator)();
48+
// INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
49+
auto dev{createAllocatable(TypeCategory::Integer, 4)};
50+
dev->SetAllocIdx(kDeviceAllocatorPos);
51+
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
52+
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
53+
RTNAME(AllocatableAllocate)
54+
(*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
55+
EXPECT_TRUE(dev->IsAllocated());
56+
57+
// Create temp array to transfer to device.
58+
auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
59+
std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
60+
RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
61+
62+
// Retrieve data from device.
63+
auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
64+
std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
65+
RTNAME(CUFDataTransferDescDesc)
66+
(host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
67+
68+
for (unsigned i = 0; i < 10; ++i) {
69+
EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
70+
}
71+
}

0 commit comments

Comments
 (0)