Skip to content

[flang][cuda] Add entry point for alloc/free and simple copy #109867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 25, 2024

Conversation

clementval
Copy link
Contributor

These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays.

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category labels Sep 24, 2024
@llvmbot
Copy link
Member

llvmbot commented Sep 24, 2024

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays.


Full diff: https://github.com/llvm/llvm-project/pull/109867.diff

4 Files Affected:

  • (modified) flang/include/flang/Runtime/CUDA/memory.h (+12)
  • (modified) flang/runtime/CUDA/memory.cpp (+29)
  • (modified) flang/unittests/Runtime/CUDA/CMakeLists.txt (+1)
  • (added) flang/unittests/Runtime/CUDA/Memory.cpp (+30)
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 33947248dc4831..2fc28ed1567b89 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda {
 
 extern "C" {
 
+/// Allocate memory on the device.
+void *RTDECL(CUFMemAlloc)(
+    std::size_t bytes, const char *sourceFile = nullptr, int sourceLine = 0);
+
+/// Free memory allocated on the device.
+void RTDECL(CUFMemFree)(
+    void *devicePtr, const char *sourceFile = nullptr, int sourceLine = 0);
+
 /// Set value to the data hold by a descriptor. The \p value pointer must be
 /// addressable to the same amount of bytes specified by the element size of
 /// the descriptor \p desc.
 void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
+/// Data transfer from a pointer to a pointer.
+void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+    unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
+
 /// Data transfer from a pointer to a descriptor.
 void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
     std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index a287fa14a48789..171203a93be2a9 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -8,12 +8,25 @@
 
 #include "flang/Runtime/CUDA/memory.h"
 #include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
 
 #include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
 extern "C" {
 
+void *RTDEF(CUFMemAlloc)(
+    std::size_t bytes, const char *sourceFile, int sourceLine) {
+  void *ptr;
+  if (bytes != 0)
+    CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+  return ptr;
+}
+
+void RTDEF(CUFMemFree)(void *ptr, const char *sourceFile, int sourceLine) {
+  CUDA_REPORT_IF_ERROR(cudaFree(ptr));
+}
+
 void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
@@ -21,6 +34,22 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
                    "value to a descriptor");
 }
 
+void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+    unsigned mode, const char *sourceFile, int sourceLine) {
+  cudaMemcpyKind kind;
+  if (mode == kHostToDevice) {
+    kind = cudaMemcpyHostToDevice;
+  } else if (mode == kDeviceToHost) {
+    kind = cudaMemcpyDeviceToHost;
+  } else if (mode == kDeviceToDevice) {
+    kind = cudaMemcpyDeviceToDevice;
+  } else {
+    Terminator terminator{sourceFile, sourceLine};
+    terminator.Crash("host to host copy not supported");
+  }
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
+}
+
 void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
     std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
index 30fb8c220233c0..a7fe604d687bdd 100644
--- a/flang/unittests/Runtime/CUDA/CMakeLists.txt
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME)
 add_flang_unittest(FlangCufRuntimeTests
   Allocatable.cpp
   AllocatorCUF.cpp
+  Memory.cpp
 )
 
 if (BUILD_SHARED_LIBS)
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
new file mode 100644
index 00000000000000..95ae6c75e3eeeb
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -0,0 +1,30 @@
+//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/memory.h"
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
+
+using namespace Fortran::runtime::cuda;
+
+TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
+  int *dev = (int *)RTNAME(CUFMemAlloc)(sizeof(int), __FILE__, __LINE__);
+  EXPECT_TRUE(dev != 0);
+  int host = 42;
+  RTNAME(CUFDataTransferPtrPtr)((void *)dev, (void *)&host, sizeof(int),
+      kHostToDevice, __FILE__, __LINE__);
+  host = 0;
+  RTNAME(CUFDataTransferPtrPtr)((void *)&host, (void *)dev, sizeof(int),
+      kDeviceToHost, __FILE__, __LINE__);
+  EXPECT_EQ(42, host);
+  RTNAME(CUFMemFree)((void *)dev, __FILE__, __LINE__);
+}

Copy link

github-actions bot commented Sep 24, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor

@vzakhari vzakhari left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

@clementval
Copy link
Contributor Author

Thanks!

Thanks for the review Slava!

@clementval clementval merged commit fa627d9 into llvm:main Sep 25, 2024
8 checks passed
@clementval clementval deleted the cuf_rt_alloc_free branch September 25, 2024 03:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants