Skip to content

[SYCL] Implement device_global host-side memory operations #8022

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class CG {
Copy2DUSM = 16,
Fill2DUSM = 17,
Memset2DUSM = 18,
CopyToDeviceGlobal = 19,
CopyFromDeviceGlobal = 20,
};

CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage,
Expand Down Expand Up @@ -490,6 +492,74 @@ class CGMemset2DUSM : public CG {
char getValue() const { return MValue; }
};

/// "Copy to device_global" command group class.
class CGCopyToDeviceGlobal : public CG {
void *MSrc;
void *MDeviceGlobalPtr;
bool MIsDeviceImageScoped;
size_t MNumBytes;
size_t MOffset;
detail::OSModuleHandle MOSModuleHandle;

public:
CGCopyToDeviceGlobal(
void *Src, void *DeviceGlobalPtr, bool IsDeviceImageScoped,
size_t NumBytes, size_t Offset,
std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
std::vector<AccessorImplHost *> Requirements,
std::vector<detail::EventImplPtr> Events,
detail::OSModuleHandle OSModuleHandle, detail::code_location loc = {})
: CG(CopyToDeviceGlobal, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MSrc(Src), MDeviceGlobalPtr(DeviceGlobalPtr),
MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes),
MOffset(Offset), MOSModuleHandle(OSModuleHandle) {}

void *getSrc() { return MSrc; }
void *getDeviceGlobalPtr() { return MDeviceGlobalPtr; }
bool isDeviceImageScoped() { return MIsDeviceImageScoped; }
size_t getNumBytes() { return MNumBytes; }
size_t getOffset() { return MOffset; }
detail::OSModuleHandle getOSModuleHandle() { return MOSModuleHandle; }
};

/// "Copy to device_global" command group class.
class CGCopyFromDeviceGlobal : public CG {
void *MDeviceGlobalPtr;
void *MDest;
bool MIsDeviceImageScoped;
size_t MNumBytes;
size_t MOffset;
detail::OSModuleHandle MOSModuleHandle;

public:
CGCopyFromDeviceGlobal(
void *DeviceGlobalPtr, void *Dest, bool IsDeviceImageScoped,
size_t NumBytes, size_t Offset,
std::vector<std::vector<char>> ArgsStorage,
std::vector<detail::AccessorImplPtr> AccStorage,
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
std::vector<AccessorImplHost *> Requirements,
std::vector<detail::EventImplPtr> Events,
detail::OSModuleHandle OSModuleHandle, detail::code_location loc = {})
: CG(CopyFromDeviceGlobal, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MDeviceGlobalPtr(DeviceGlobalPtr), MDest(Dest),
MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes),
MOffset(Offset), MOSModuleHandle(OSModuleHandle) {}

void *getDeviceGlobalPtr() { return MDeviceGlobalPtr; }
void *getDest() { return MDest; }
bool isDeviceImageScoped() { return MIsDeviceImageScoped; }
size_t getNumBytes() { return MNumBytes; }
size_t getOffset() { return MOffset; }
detail::OSModuleHandle getOSModuleHandle() { return MOSModuleHandle; }
};

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
96 changes: 96 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sycl/detail/handler_proxy.hpp>
#include <sycl/detail/os_util.hpp>
#include <sycl/event.hpp>
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/ext/oneapi/properties/property.hpp>
Expand Down Expand Up @@ -2576,6 +2577,91 @@ class __SYCL_EXPORT handler {
commonUSMFill2DFallbackKernel(Dest, DestPitch, Pattern, Width, Height);
}

/// Copies data from a USM memory region to a device_global.
/// Throws an exception if the copy operation intends to write outside the
/// memory range \param Dest, as specified through \param NumBytes and
/// \param DestOffset.
///
/// \param Dest is the destination device_glboal.
/// \param Src is a USM pointer to the source memory.
/// \param NumBytes is a number of bytes to copy.
/// \param DestOffset is the offset into \param Dest to copy to.
template <typename T, typename PropertyListT>
void memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
const void *Src, size_t NumBytes = sizeof(T),
size_t DestOffset = 0) {
if (sizeof(T) < DestOffset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy to device_global is out of bounds.");

constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
ext::oneapi::experimental::device_image_scope_key>();
memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset);
}

/// Copies data from a device_global to USM memory.
/// Throws an exception if the copy operation intends to read outside the
/// memory range \param Src, as specified through \param NumBytes and
/// \param SrcOffset.
///
/// \param Dest is a USM pointer to copy to.
/// \param Src is the source device_global.
/// \param NumBytes is a number of bytes to copy.
/// \param SrcOffset is the offset into \param Src to copy from.
template <typename T, typename PropertyListT>
void
memcpy(void *Dest,
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
size_t NumBytes = sizeof(T), size_t SrcOffset = 0) {
if (sizeof(T) < SrcOffset + NumBytes)
throw sycl::exception(make_error_code(errc::invalid),
"Copy from device_global is out of bounds.");

constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
ext::oneapi::experimental::device_image_scope_key>();
memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
SrcOffset);
}

/// Copies elements of type `std::remove_all_extents_t<T>` from a USM memory
/// region to a device_global.
/// Throws an exception if the copy operation intends to write outside the
/// memory range \param Dest, as specified through \param Count and
/// \param StartIndex.
///
/// \param Src is a USM pointer to the source memory.
/// \param Dest is the destination device_glboal.
/// \param Count is a number of elements to copy.
/// \param StartIndex is the index of the first element in Dest to copy to.
template <typename T, typename PropertyListT>
void copy(const std::remove_all_extents_t<T> *Src,
ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
size_t StartIndex = 0) {
this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>));
}

/// Copies elements of type `std::remove_all_extents_t<T>` from a
/// device_global to a USM memory region.
/// Throws an exception if the copy operation intends to write outside the
/// memory range \param Src, as specified through \param Count and
/// \param StartIndex.
///
/// \param Src is the source device_global.
/// \param Dest is a USM pointer to copy to.
/// \param Count is a number of elements to copy.
/// \param StartIndex is the index of the first element in Src to copy from.
template <typename T, typename PropertyListT>
void
copy(const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
std::remove_all_extents_t<T> *Dest,
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
size_t StartIndex = 0) {
this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
StartIndex * sizeof(std::remove_all_extents_t<T>));
}

private:
std::shared_ptr<detail::handler_impl> MImpl;
std::shared_ptr<detail::queue_impl> MQueue;
Expand Down Expand Up @@ -2774,6 +2860,16 @@ class __SYCL_EXPORT handler {
// Implementation of ext_oneapi_memset2d using command for native 2D memset.
void ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
size_t Width, size_t Height);

// Implementation of memcpy to device_global.
void memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src,
bool IsDeviceImageScoped, size_t NumBytes,
size_t Offset);

// Implementation of memcpy from device_global.
void memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
bool IsDeviceImageScoped, size_t NumBytes,
size_t Offset);
};
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
Loading