Skip to content

[flang][cuda] Data transfer with descriptor #114302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

clementval
Copy link
Contributor

Use the feature added in #114301 to perform data transfer between data having a descriptor.

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category labels Oct 30, 2024
@llvmbot
Copy link
Member

llvmbot commented Oct 30, 2024

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Use the feature added in #114301 to perform data transfer between data having a descriptor.


Full diff: https://github.com/llvm/llvm-project/pull/114302.diff

2 Files Affected:

  • (modified) flang/runtime/CUDA/memory.cpp (+32-2)
  • (modified) flang/unittests/Runtime/CUDA/Memory.cpp (+40)
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 4778a4ae77683f..f25d3b531c84f0 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "../terminator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
 
 #include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+  return dst;
+}
+
+static void *MemmoveDeviceToHost(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+  return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+  return dst;
+}
+
 extern "C" {
 
 void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
 void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
     unsigned mode, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  terminator.Crash(
-      "not yet implemented: CUDA data transfer between two descriptors");
+  MemmoveFct memmoveFct;
+  if (mode == kHostToDevice) {
+    memmoveFct = &MemmoveHostToDevice;
+  } else if (mode == kDeviceToHost) {
+    memmoveFct = &MemmoveDeviceToHost;
+  } else if (mode == kDeviceToDevice) {
+    memmoveFct = &MemmoveDeviceToDevice;
+  }
+  Fortran::runtime::Assign(
+      dstDesc, srcDesc, terminator, MaybeReallocate, memmoveFct);
 }
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..ade05e21b70a89 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "gtest/gtest.h"
 #include "../../../runtime/terminator.h"
+#include "../tools.h"
 #include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda_runtime.h"
 
+using namespace Fortran::runtime;
 using namespace Fortran::runtime::cuda;
 
 TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
   EXPECT_EQ(42, host);
   RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
 }
+
+static OwningPtr<Descriptor> createAllocatable(
+    Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+  return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+      CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+  using Fortran::common::TypeCategory;
+  RTNAME(CUFRegisterAllocator)();
+  // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+  auto dev{createAllocatable(TypeCategory::Integer, 4)};
+  dev->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+  RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+  RTNAME(AllocatableAllocate)
+  (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(dev->IsAllocated());
+
+  // Create temp array to transfer to device.
+  auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+  RTNAME(CUFDataTransferDescDesc)(*dev, *x, kHostToDevice, __FILE__, __LINE__);
+
+  // Retrieve data from device.
+  auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+  RTNAME(CUFDataTransferDescDesc)(
+      *host, *dev, kDeviceToHost, __FILE__, __LINE__);
+
+  for (unsigned i = 0; i < 10; ++i) {
+    EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+  }
+}

Copy link

github-actions bot commented Oct 30, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@Renaud-K Renaud-K self-requested a review October 30, 2024 23:02
Copy link
Contributor

@Renaud-K Renaud-K left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good. Nice way of testing the runtime.

@clementval clementval deleted the branch llvm:users/clementval/cuf_data_transfer_desc1 November 1, 2024 17:34
@clementval clementval closed this Nov 1, 2024
clementval added a commit that referenced this pull request Nov 1, 2024
Reopen PR #114302 as it was automatically closed. 

Review in #114302
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. 

Review in llvm#114302
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Reopen PR llvm#114302 as it was automatically closed. 

Review in llvm#114302
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants