-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Add entry point for alloc/free and simple copy #109867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThese will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays. Full diff: https://github.com/llvm/llvm-project/pull/109867.diff 4 Files Affected:
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 33947248dc4831..2fc28ed1567b89 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda {
extern "C" {
+/// Allocate memory on the device.
+void *RTDECL(CUFMemAlloc)(
+ std::size_t bytes, const char *sourceFile = nullptr, int sourceLine = 0);
+
+/// Free memory allocated on the device.
+void RTDECL(CUFMemFree)(
+ void *devicePtr, const char *sourceFile = nullptr, int sourceLine = 0);
+
/// Set value to the data hold by a descriptor. The \p value pointer must be
/// addressable to the same amount of bytes specified by the element size of
/// the descriptor \p desc.
void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile = nullptr, int sourceLine = 0);
+/// Data transfer from a pointer to a pointer.
+void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+ unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
+
/// Data transfer from a pointer to a descriptor.
void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index a287fa14a48789..171203a93be2a9 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -8,12 +8,25 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
extern "C" {
+void *RTDEF(CUFMemAlloc)(
+ std::size_t bytes, const char *sourceFile, int sourceLine) {
+ void *ptr;
+ if (bytes != 0)
+ CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ return ptr;
+}
+
+void RTDEF(CUFMemFree)(void *ptr, const char *sourceFile, int sourceLine) {
+ CUDA_REPORT_IF_ERROR(cudaFree(ptr));
+}
+
void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
@@ -21,6 +34,22 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
"value to a descriptor");
}
+void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+ unsigned mode, const char *sourceFile, int sourceLine) {
+ cudaMemcpyKind kind;
+ if (mode == kHostToDevice) {
+ kind = cudaMemcpyHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ kind = cudaMemcpyDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ kind = cudaMemcpyDeviceToDevice;
+ } else {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash("host to host copy not supported");
+ }
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
+}
+
void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
index 30fb8c220233c0..a7fe604d687bdd 100644
--- a/flang/unittests/Runtime/CUDA/CMakeLists.txt
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME)
add_flang_unittest(FlangCufRuntimeTests
Allocatable.cpp
AllocatorCUF.cpp
+ Memory.cpp
)
if (BUILD_SHARED_LIBS)
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
new file mode 100644
index 00000000000000..95ae6c75e3eeeb
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -0,0 +1,30 @@
+//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/memory.h"
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
+
+using namespace Fortran::runtime::cuda;
+
+TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
+ int *dev = (int *)RTNAME(CUFMemAlloc)(sizeof(int), __FILE__, __LINE__);
+ EXPECT_TRUE(dev != 0);
+ int host = 42;
+ RTNAME(CUFDataTransferPtrPtr)((void *)dev, (void *)&host, sizeof(int),
+ kHostToDevice, __FILE__, __LINE__);
+ host = 0;
+ RTNAME(CUFDataTransferPtrPtr)((void *)&host, (void *)dev, sizeof(int),
+ kDeviceToHost, __FILE__, __LINE__);
+ EXPECT_EQ(42, host);
+ RTNAME(CUFMemFree)((void *)dev, __FILE__, __LINE__);
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
Thanks for the review Slava! |
These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays.