-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Data transfer with descriptor #114302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][cuda] Data transfer with descriptor #114302
Conversation
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesUse the feature added in #114301 to perform data transfer between data having a descriptor. Full diff: https://github.com/llvm/llvm-project/pull/114302.diff 2 Files Affected:
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 4778a4ae77683f..f25d3b531c84f0 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
+static void *MemmoveDeviceToHost(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+ return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
extern "C" {
void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
- terminator.Crash(
- "not yet implemented: CUDA data transfer between two descriptors");
+ MemmoveFct memmoveFct;
+ if (mode == kHostToDevice) {
+ memmoveFct = &MemmoveHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ memmoveFct = &MemmoveDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ memmoveFct = &MemmoveDeviceToDevice;
+ }
+ Fortran::runtime::Assign(
+ dstDesc, srcDesc, terminator, MaybeReallocate, memmoveFct);
}
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..ade05e21b70a89 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
+#include "../tools.h"
#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda_runtime.h"
+using namespace Fortran::runtime;
using namespace Fortran::runtime::cuda;
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+ auto dev{createAllocatable(TypeCategory::Integer, 4)};
+ dev->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+ RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(dev->IsAllocated());
+
+ // Create temp array to transfer to device.
+ auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+ RTNAME(CUFDataTransferDescDesc)(*dev, *x, kHostToDevice, __FILE__, __LINE__);
+
+ // Retrieve data from device.
+ auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+ RTNAME(CUFDataTransferDescDesc)(
+ *host, *dev, kDeviceToHost, __FILE__, __LINE__);
+
+ for (unsigned i = 0; i < 10; ++i) {
+ EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+ }
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good. Nice way of testing the runtime.
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
Use the feature added in #114301 to perform data transfer between data having a descriptor.