Skip to content

Commit 9c54512

Browse files
authored
[flang][cuda] Allocate the dst descriptor in data transfer (llvm#143437)
In a test like: ``` integer, allocatable, device :: da(:) allocate(a(200)) a = 2 da = a ! da is not allocated before data transfer is initiated. Allocate it with a ``` The reference compiler will allocate the data for the `da` descriptor so the data transfer can be done properly.
1 parent 8957e64 commit 9c54512

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

flang-rt/lib/cuda/memory.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
105105
} else {
106106
terminator.Crash("host to host copy not supported");
107107
}
108+
// Allocate dst descriptor if not allocated.
109+
if (!dstDesc->IsAllocated()) {
110+
dstDesc->ApplyMold(*srcDesc, dstDesc->rank());
111+
dstDesc->Allocate(/*asyncObject=*/nullptr);
112+
}
108113
if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) {
109114
// Special case when rhs is bigger than lhs and both are contiguous arrays.
110115
// In this case we do a simple ptr to ptr transfer with the size of lhs.

flang-rt/unittests/Runtime/CUDA/Memory.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,29 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
7070
EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
7171
}
7272
}
73+
74+
TEST(MemoryCUFTest, CUFDataTransferDescDescDstNotAllocated) {
75+
using Fortran::common::TypeCategory;
76+
RTNAME(CUFRegisterAllocator)();
77+
// INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
78+
auto dev{createAllocatable(TypeCategory::Integer, 4)};
79+
dev->SetAllocIdx(kDeviceAllocatorPos);
80+
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
81+
EXPECT_FALSE(dev->IsAllocated());
82+
83+
// Create temp array to transfer to device.
84+
auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
85+
std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
86+
RTNAME(CUFDataTransferDescDesc)
87+
(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
88+
89+
// Retrieve data from device.
90+
auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
91+
std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
92+
RTNAME(CUFDataTransferDescDesc)
93+
(host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
94+
95+
for (unsigned i = 0; i < 10; ++i) {
96+
EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
97+
}
98+
}

0 commit comments

Comments
 (0)