Skip to content

[flang][cuda] Add entry point for alloc/free and simple copy #109867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions flang/include/flang/Runtime/CUDA/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
#include "flang/Runtime/descriptor.h"
#include "flang/Runtime/entry-names.h"

/// Type of memory for allocation/deallocation
static constexpr unsigned kMemTypeDevice = 0;
static constexpr unsigned kMemTypeManaged = 1;
static constexpr unsigned kMemTypeUnified = 2;
static constexpr unsigned kMemTypePinned = 3;

/// Data transfer kinds.
static constexpr unsigned kHostToDevice = 0;
static constexpr unsigned kDeviceToHost = 1;
static constexpr unsigned kDeviceToDevice = 2;
Expand Down
12 changes: 12 additions & 0 deletions flang/include/flang/Runtime/CUDA/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda {

extern "C" {

/// Allocate memory on the device.
void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Free memory allocated on the device.
void RTDECL(CUFMemFree)(void *devicePtr, unsigned type,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Set value to the data hold by a descriptor. The \p value pointer must be
/// addressable to the same amount of bytes specified by the element size of
/// the descriptor \p desc.
void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile = nullptr, int sourceLine = 0);

/// Data transfer from a pointer to a pointer.
void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);

/// Data transfer from a pointer to a descriptor.
void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
Expand Down
52 changes: 52 additions & 0 deletions flang/runtime/CUDA/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,71 @@

#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"

#include "cuda_runtime.h"

namespace Fortran::runtime::cuda {
extern "C" {

void *RTDEF(CUFMemAlloc)(
std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) {
void *ptr = nullptr;
if (bytes != 0) {
if (type == kMemTypeDevice) {
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
} else if (type == kMemTypePinned) {
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes));
} else {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("unsupported memory type");
}
}
return ptr;
}

void RTDEF(CUFMemFree)(
void *ptr, unsigned type, const char *sourceFile, int sourceLine) {
if (!ptr)
return;
if (type == kMemTypeDevice || type == kMemTypeManaged ||
type == kMemTypeUnified) {
CUDA_REPORT_IF_ERROR(cudaFree(ptr));
} else if (type == kMemTypePinned) {
CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr));
} else {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("unsupported memory type");
}
}

void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("not yet implemented: CUDA data transfer from a scalar "
"value to a descriptor");
}

void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
unsigned mode, const char *sourceFile, int sourceLine) {
cudaMemcpyKind kind;
if (mode == kHostToDevice) {
kind = cudaMemcpyHostToDevice;
} else if (mode == kDeviceToHost) {
kind = cudaMemcpyDeviceToHost;
} else if (mode == kDeviceToDevice) {
kind = cudaMemcpyDeviceToDevice;
} else {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("host to host copy not supported");
}
// TODO: Use cudaMemcpyAsync when we have support for stream.
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
}

void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
Expand Down
1 change: 1 addition & 0 deletions flang/unittests/Runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME)
add_flang_unittest(FlangCufRuntimeTests
Allocatable.cpp
AllocatorCUF.cpp
Memory.cpp
)

if (BUILD_SHARED_LIBS)
Expand Down
31 changes: 31 additions & 0 deletions flang/unittests/Runtime/CUDA/Memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/common.h"

#include "cuda_runtime.h"

using namespace Fortran::runtime::cuda;

TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
int *dev = (int *)RTNAME(CUFMemAlloc)(
sizeof(int), kMemTypeDevice, __FILE__, __LINE__);
EXPECT_TRUE(dev != 0);
int host = 42;
RTNAME(CUFDataTransferPtrPtr)
((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__);
host = 0;
RTNAME(CUFDataTransferPtrPtr)
((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__);
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
Loading