-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Add new entry points function for data transfer #108244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-runtime @llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesAdd new entry points for more complex data transfer involving descriptors. These functions will be called when converting Full diff: https://github.com/llvm/llvm-project/pull/108244.diff 4 Files Affected:
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
new file mode 100644
index 00000000000000..39fd333152eb8e
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -0,0 +1,45 @@
+//===-- include/flang/Runtime/CUDA/memory.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_MEMORY_H_
+#define FORTRAN_RUNTIME_CUDA_MEMORY_H_
+
+#include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/entry-names.h"
+#include <cstddef>
+
+static constexpr unsigned kHostToDevice = 0;
+static constexpr unsigned kDeviceToHost = 1;
+static constexpr unsigned kDeviceToDevice = 2;
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+// Set value to the data hold by a descriptor.
+void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void* value,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+
+// Data transfer from a pointer to a descriptor.
+void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
+ std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
+// Data transfer from a descriptor to a pointer.
+void RTDECL(CUFDataTransferPtrDesc)(void *dst, const Descriptor &src,
+ std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
+// Data transfer from a descriptor to a descriptor.
+void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dst,
+ const Descriptor &src, unsigned mode, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
+} // extern "C"
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_MEMORY_H_
diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index c1debf28d00332..05f644654efe1b 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -1467,4 +1467,4 @@ fir::getTypeSizeAndAlignmentOrCrash(mlir::Location loc, mlir::Type ty,
if (result)
return *result;
TODO(loc, "computing size of a component");
-}
\ No newline at end of file
+}
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 81055b2737c0f1..490bb369b572f6 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -16,6 +16,7 @@ set(CUFRT_LIBNAME CufRuntime_cuda_${CUDAToolkit_VERSION_MAJOR})
add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
descriptor.cpp
+ memory.cpp
)
if (BUILD_SHARED_LIBS)
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
new file mode 100644
index 00000000000000..34eee34762bec9
--- /dev/null
+++ b/flang/runtime/CUDA/memory.cpp
@@ -0,0 +1,49 @@
+//===-- runtime/CUDA/memory.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/memory.h"
+#include "../terminator.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+extern "C" {
+
+void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void* value,
+ const char *sourceFile, int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash(
+ "not yet implemented: CUDA data transfer from a scalar value to a descriptor");
+}
+
+void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
+ std::size_t bytes, unsigned mode, const char *sourceFile,
+ int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash(
+ "not yet implemented: CUDA data transfer from a pointer to a descriptor");
+}
+
+void RTDEF(CUFDataTransferPtrDesc)(void* addr, const Descriptor &desc,
+ std::size_t bytes, unsigned mode, const char *sourceFile,
+ int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash(
+ "not yet implemented: CUDA data transfer from a descriptor to a pointer");
+}
+
+void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dstDesc,
+ const Descriptor &srcDesc, unsigned mode, const char *sourceFile,
+ int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash(
+ "not yet implemented: CUDA data transfer between two descriptors");
+}
+
+}
+} // namespace Fortran::runtime::cuda
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
||
// Data transfer from a descriptor to a descriptor. | ||
void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dst, | ||
const Descriptor &src, unsigned mode, const char *sourceFile = nullptr, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be used for entities (described by src
/dst
descriptors) of derived types with allocatable/pointer components or is this disallowed in CUF?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean smth like
module m
type :: t1
integer, allocatable :: a(:)
end type
end
program p
use m
type(t1) :: h
type(t1), device :: d
h = d
end
This is allowed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it mean this API needs to basically reproduce the Assign
runtime with regards to handling automatic reallocation of the components or is it supposed to make a shallow copy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. Basically it would be the same as Assign but with cuda memcpy instead of the standard copy. Hopefully we can adapt Assign to be called from here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vzakhari Do you have any more concerns about these new entry points?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the clarifications, Valentin!
Convert cuf.data_transfer operations involving descriptors to the newly introduced entry points (#108244).
Convert cuf.data_transfer operations involving descriptors to the newly introduced entry points (llvm#108244).
Convert cuf.data_transfer operations involving descriptors to the newly introduced entry points (llvm#108244).
Add new entry points for more complex data transfer involving descriptors. These functions will be called when converting
cuf.data_transfer
operations.